1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 
 1663 //
 1664 // Compute padding required for nodes which need alignment
 1665 //
 1666 
 1667 // The address of the call instruction needs to be 4-byte aligned to
 1668 // ensure that it does not span a cache line so that it can be patched.
 1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1670 {
 1671   current_offset += clear_avx_size(); // skip vzeroupper
 1672   current_offset += 1; // skip call opcode byte
 1673   return align_up(current_offset, alignment_required()) - current_offset;
 1674 }
 1675 
 1676 // The address of the call instruction needs to be 4-byte aligned to
 1677 // ensure that it does not span a cache line so that it can be patched.
 1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1679 {
 1680   current_offset += clear_avx_size(); // skip vzeroupper
 1681   current_offset += 11; // skip movq instruction + call opcode byte
 1682   return align_up(current_offset, alignment_required()) - current_offset;
 1683 }
 1684 
 1685 // This could be in MacroAssembler but it's fairly C2 specific
 1686 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1687   Label exit;
 1688   __ jccb(Assembler::noParity, exit);
 1689   __ pushf();
 1690   //
 1691   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1692   // zero OF,AF,SF for NaN values.
 1693   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1694   // values returns 'less than' result (CF is set).
 1695   // Leave the rest of flags unchanged.
 1696   //
 1697   //    7 6 5 4 3 2 1 0
 1698   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1699   //    0 0 1 0 1 0 1 1   (0x2B)
 1700   //
 1701   __ andq(Address(rsp, 0), 0xffffff2b);
 1702   __ popf();
 1703   __ bind(exit);
 1704 }
 1705 
 1706 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1707   // If any floating point comparison instruction is used, unordered case always triggers jump
 1708   // for below condition, CF=1 is true when at least one input is NaN
 1709   Label done;
 1710   __ movl(dst, -1);
 1711   __ jcc(Assembler::below, done);
 1712   __ setcc(Assembler::notEqual, dst);
 1713   __ bind(done);
 1714 }
 1715 
 1716 // Math.min()    # Math.max()
 1717 // --------------------------
 1718 // ucomis[s/d]   #
 1719 // ja   -> b     # a
 1720 // jp   -> NaN   # NaN
 1721 // jb   -> a     # b
 1722 // je            #
 1723 // |-jz -> a | b # a & b
 1724 // |    -> a     #
 1725 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1726                             XMMRegister a, XMMRegister b,
 1727                             XMMRegister xmmt, Register rt,
 1728                             bool min, bool single) {
 1729 
 1730   Label nan, zero, below, above, done;
 1731 
 1732   if (single)
 1733     __ ucomiss(a, b);
 1734   else
 1735     __ ucomisd(a, b);
 1736 
 1737   if (dst->encoding() != (min ? b : a)->encoding())
 1738     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1739   else
 1740     __ jccb(Assembler::above, done);
 1741 
 1742   __ jccb(Assembler::parity, nan);  // PF=1
 1743   __ jccb(Assembler::below, below); // CF=1
 1744 
 1745   // equal
 1746   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1747   if (single) {
 1748     __ ucomiss(a, xmmt);
 1749     __ jccb(Assembler::equal, zero);
 1750 
 1751     __ movflt(dst, a);
 1752     __ jmp(done);
 1753   }
 1754   else {
 1755     __ ucomisd(a, xmmt);
 1756     __ jccb(Assembler::equal, zero);
 1757 
 1758     __ movdbl(dst, a);
 1759     __ jmp(done);
 1760   }
 1761 
 1762   __ bind(zero);
 1763   if (min)
 1764     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1765   else
 1766     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1767 
 1768   __ jmp(done);
 1769 
 1770   __ bind(above);
 1771   if (single)
 1772     __ movflt(dst, min ? b : a);
 1773   else
 1774     __ movdbl(dst, min ? b : a);
 1775 
 1776   __ jmp(done);
 1777 
 1778   __ bind(nan);
 1779   if (single) {
 1780     __ movl(rt, 0x7fc00000); // Float.NaN
 1781     __ movdl(dst, rt);
 1782   }
 1783   else {
 1784     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1785     __ movdq(dst, rt);
 1786   }
 1787   __ jmp(done);
 1788 
 1789   __ bind(below);
 1790   if (single)
 1791     __ movflt(dst, min ? a : b);
 1792   else
 1793     __ movdbl(dst, min ? a : b);
 1794 
 1795   __ bind(done);
 1796 }
 1797 
 1798 //=============================================================================
 1799 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1800 
 1801 int ConstantTable::calculate_table_base_offset() const {
 1802   return 0;  // absolute addressing, no offset
 1803 }
 1804 
 1805 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1806 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1807   ShouldNotReachHere();
 1808 }
 1809 
 1810 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1811   // Empty encoding
 1812 }
 1813 
 1814 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1815   return 0;
 1816 }
 1817 
 1818 #ifndef PRODUCT
 1819 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1820   st->print("# MachConstantBaseNode (empty encoding)");
 1821 }
 1822 #endif
 1823 
 1824 
 1825 //=============================================================================
 1826 #ifndef PRODUCT
 1827 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1828   Compile* C = ra_->C;
 1829 
 1830   int framesize = C->output()->frame_size_in_bytes();
 1831   int bangsize = C->output()->bang_size_in_bytes();
 1832   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1833   // Remove wordSize for return addr which is already pushed.
 1834   framesize -= wordSize;
 1835 
 1836   if (C->output()->need_stack_bang(bangsize)) {
 1837     framesize -= wordSize;
 1838     st->print("# stack bang (%d bytes)", bangsize);
 1839     st->print("\n\t");
 1840     st->print("pushq   rbp\t# Save rbp");
 1841     if (PreserveFramePointer) {
 1842         st->print("\n\t");
 1843         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1844     }
 1845     if (framesize) {
 1846       st->print("\n\t");
 1847       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1848     }
 1849   } else {
 1850     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1851     st->print("\n\t");
 1852     framesize -= wordSize;
 1853     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1854     if (PreserveFramePointer) {
 1855       st->print("\n\t");
 1856       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1857       if (framesize > 0) {
 1858         st->print("\n\t");
 1859         st->print("addq    rbp, #%d", framesize);
 1860       }
 1861     }
 1862   }
 1863 
 1864   if (VerifyStackAtCalls) {
 1865     st->print("\n\t");
 1866     framesize -= wordSize;
 1867     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1868 #ifdef ASSERT
 1869     st->print("\n\t");
 1870     st->print("# stack alignment check");
 1871 #endif
 1872   }
 1873   if (C->stub_function() != nullptr) {
 1874     st->print("\n\t");
 1875     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1876     st->print("\n\t");
 1877     st->print("je      fast_entry\t");
 1878     st->print("\n\t");
 1879     st->print("call    #nmethod_entry_barrier_stub\t");
 1880     st->print("\n\tfast_entry:");
 1881   }
 1882   st->cr();
 1883 }
 1884 #endif
 1885 
 1886 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1887   Compile* C = ra_->C;
 1888 
 1889   __ verified_entry(C);
 1890 
 1891   if (ra_->C->stub_function() == nullptr) {
 1892     __ entry_barrier();
 1893   }
 1894 
 1895   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1896     __ bind(*_verified_entry);
 1897   }
 1898 
 1899   C->output()->set_frame_complete(__ offset());
 1900 
 1901   if (C->has_mach_constant_base_node()) {
 1902     // NOTE: We set the table base offset here because users might be
 1903     // emitted before MachConstantBaseNode.
 1904     ConstantTable& constant_table = C->output()->constant_table();
 1905     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1906   }
 1907 }
 1908 
 1909 
 1910 int MachPrologNode::reloc() const
 1911 {
 1912   return 0; // a large enough number
 1913 }
 1914 
 1915 //=============================================================================
 1916 #ifndef PRODUCT
 1917 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1918 {
 1919   Compile* C = ra_->C;
 1920   if (generate_vzeroupper(C)) {
 1921     st->print("vzeroupper");
 1922     st->cr(); st->print("\t");
 1923   }
 1924 
 1925   int framesize = C->output()->frame_size_in_bytes();
 1926   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1927   // Remove word for return adr already pushed
 1928   // and RBP
 1929   framesize -= 2*wordSize;
 1930 
 1931   if (framesize) {
 1932     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1933     st->print("\t");
 1934   }
 1935 
 1936   st->print_cr("popq    rbp");
 1937   if (do_polling() && C->is_method_compilation()) {
 1938     st->print("\t");
 1939     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1940                  "ja      #safepoint_stub\t"
 1941                  "# Safepoint: poll for GC");
 1942   }
 1943 }
 1944 #endif
 1945 
 1946 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1947 {
 1948   Compile* C = ra_->C;
 1949 
 1950   if (generate_vzeroupper(C)) {
 1951     // Clear upper bits of YMM registers when current compiled code uses
 1952     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1953     __ vzeroupper();
 1954   }
 1955 
 1956   // Subtract two words to account for return address and rbp
 1957   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1958   __ remove_frame(initial_framesize, C->needs_stack_repair());
 1959 
 1960   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1961     __ reserved_stack_check();
 1962   }
 1963 
 1964   if (do_polling() && C->is_method_compilation()) {
 1965     Label dummy_label;
 1966     Label* code_stub = &dummy_label;
 1967     if (!C->output()->in_scratch_emit_size()) {
 1968       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1969       C->output()->add_stub(stub);
 1970       code_stub = &stub->entry();
 1971     }
 1972     __ relocate(relocInfo::poll_return_type);
 1973     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1974   }
 1975 }
 1976 
 1977 int MachEpilogNode::reloc() const
 1978 {
 1979   return 2; // a large enough number
 1980 }
 1981 
 1982 const Pipeline* MachEpilogNode::pipeline() const
 1983 {
 1984   return MachNode::pipeline_class();
 1985 }
 1986 
 1987 //=============================================================================
 1988 
 1989 enum RC {
 1990   rc_bad,
 1991   rc_int,
 1992   rc_kreg,
 1993   rc_float,
 1994   rc_stack
 1995 };
 1996 
 1997 static enum RC rc_class(OptoReg::Name reg)
 1998 {
 1999   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2000 
 2001   if (OptoReg::is_stack(reg)) return rc_stack;
 2002 
 2003   VMReg r = OptoReg::as_VMReg(reg);
 2004 
 2005   if (r->is_Register()) return rc_int;
 2006 
 2007   if (r->is_KRegister()) return rc_kreg;
 2008 
 2009   assert(r->is_XMMRegister(), "must be");
 2010   return rc_float;
 2011 }
 2012 
 2013 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2014 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2015                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2016 
 2017 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2018                      int stack_offset, int reg, uint ireg, outputStream* st);
 2019 
 2020 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2021                                       int dst_offset, uint ireg, outputStream* st) {
 2022   if (masm) {
 2023     switch (ireg) {
 2024     case Op_VecS:
 2025       __ movq(Address(rsp, -8), rax);
 2026       __ movl(rax, Address(rsp, src_offset));
 2027       __ movl(Address(rsp, dst_offset), rax);
 2028       __ movq(rax, Address(rsp, -8));
 2029       break;
 2030     case Op_VecD:
 2031       __ pushq(Address(rsp, src_offset));
 2032       __ popq (Address(rsp, dst_offset));
 2033       break;
 2034     case Op_VecX:
 2035       __ pushq(Address(rsp, src_offset));
 2036       __ popq (Address(rsp, dst_offset));
 2037       __ pushq(Address(rsp, src_offset+8));
 2038       __ popq (Address(rsp, dst_offset+8));
 2039       break;
 2040     case Op_VecY:
 2041       __ vmovdqu(Address(rsp, -32), xmm0);
 2042       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2043       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2044       __ vmovdqu(xmm0, Address(rsp, -32));
 2045       break;
 2046     case Op_VecZ:
 2047       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2048       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2049       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2050       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2051       break;
 2052     default:
 2053       ShouldNotReachHere();
 2054     }
 2055 #ifndef PRODUCT
 2056   } else {
 2057     switch (ireg) {
 2058     case Op_VecS:
 2059       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2060                 "movl    rax, [rsp + #%d]\n\t"
 2061                 "movl    [rsp + #%d], rax\n\t"
 2062                 "movq    rax, [rsp - #8]",
 2063                 src_offset, dst_offset);
 2064       break;
 2065     case Op_VecD:
 2066       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2067                 "popq    [rsp + #%d]",
 2068                 src_offset, dst_offset);
 2069       break;
 2070      case Op_VecX:
 2071       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2072                 "popq    [rsp + #%d]\n\t"
 2073                 "pushq   [rsp + #%d]\n\t"
 2074                 "popq    [rsp + #%d]",
 2075                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2076       break;
 2077     case Op_VecY:
 2078       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2079                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2080                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2081                 "vmovdqu xmm0, [rsp - #32]",
 2082                 src_offset, dst_offset);
 2083       break;
 2084     case Op_VecZ:
 2085       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2086                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2087                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2088                 "vmovdqu xmm0, [rsp - #64]",
 2089                 src_offset, dst_offset);
 2090       break;
 2091     default:
 2092       ShouldNotReachHere();
 2093     }
 2094 #endif
 2095   }
 2096 }
 2097 
 2098 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2099                                        PhaseRegAlloc* ra_,
 2100                                        bool do_size,
 2101                                        outputStream* st) const {
 2102   assert(masm != nullptr || st  != nullptr, "sanity");
 2103   // Get registers to move
 2104   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2105   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2106   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2107   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2108 
 2109   enum RC src_second_rc = rc_class(src_second);
 2110   enum RC src_first_rc = rc_class(src_first);
 2111   enum RC dst_second_rc = rc_class(dst_second);
 2112   enum RC dst_first_rc = rc_class(dst_first);
 2113 
 2114   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2115          "must move at least 1 register" );
 2116 
 2117   if (src_first == dst_first && src_second == dst_second) {
 2118     // Self copy, no move
 2119     return 0;
 2120   }
 2121   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 2122     uint ireg = ideal_reg();
 2123     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2124     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2125     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2126       // mem -> mem
 2127       int src_offset = ra_->reg2offset(src_first);
 2128       int dst_offset = ra_->reg2offset(dst_first);
 2129       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2130     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2131       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2132     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2133       int stack_offset = ra_->reg2offset(dst_first);
 2134       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2135     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2136       int stack_offset = ra_->reg2offset(src_first);
 2137       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2138     } else {
 2139       ShouldNotReachHere();
 2140     }
 2141     return 0;
 2142   }
 2143   if (src_first_rc == rc_stack) {
 2144     // mem ->
 2145     if (dst_first_rc == rc_stack) {
 2146       // mem -> mem
 2147       assert(src_second != dst_first, "overlap");
 2148       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2149           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2150         // 64-bit
 2151         int src_offset = ra_->reg2offset(src_first);
 2152         int dst_offset = ra_->reg2offset(dst_first);
 2153         if (masm) {
 2154           __ pushq(Address(rsp, src_offset));
 2155           __ popq (Address(rsp, dst_offset));
 2156 #ifndef PRODUCT
 2157         } else {
 2158           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2159                     "popq    [rsp + #%d]",
 2160                      src_offset, dst_offset);
 2161 #endif
 2162         }
 2163       } else {
 2164         // 32-bit
 2165         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2166         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2167         // No pushl/popl, so:
 2168         int src_offset = ra_->reg2offset(src_first);
 2169         int dst_offset = ra_->reg2offset(dst_first);
 2170         if (masm) {
 2171           __ movq(Address(rsp, -8), rax);
 2172           __ movl(rax, Address(rsp, src_offset));
 2173           __ movl(Address(rsp, dst_offset), rax);
 2174           __ movq(rax, Address(rsp, -8));
 2175 #ifndef PRODUCT
 2176         } else {
 2177           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2178                     "movl    rax, [rsp + #%d]\n\t"
 2179                     "movl    [rsp + #%d], rax\n\t"
 2180                     "movq    rax, [rsp - #8]",
 2181                      src_offset, dst_offset);
 2182 #endif
 2183         }
 2184       }
 2185       return 0;
 2186     } else if (dst_first_rc == rc_int) {
 2187       // mem -> gpr
 2188       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2189           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2190         // 64-bit
 2191         int offset = ra_->reg2offset(src_first);
 2192         if (masm) {
 2193           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2194 #ifndef PRODUCT
 2195         } else {
 2196           st->print("movq    %s, [rsp + #%d]\t# spill",
 2197                      Matcher::regName[dst_first],
 2198                      offset);
 2199 #endif
 2200         }
 2201       } else {
 2202         // 32-bit
 2203         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2204         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2205         int offset = ra_->reg2offset(src_first);
 2206         if (masm) {
 2207           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2208 #ifndef PRODUCT
 2209         } else {
 2210           st->print("movl    %s, [rsp + #%d]\t# spill",
 2211                      Matcher::regName[dst_first],
 2212                      offset);
 2213 #endif
 2214         }
 2215       }
 2216       return 0;
 2217     } else if (dst_first_rc == rc_float) {
 2218       // mem-> xmm
 2219       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2220           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2221         // 64-bit
 2222         int offset = ra_->reg2offset(src_first);
 2223         if (masm) {
 2224           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2225 #ifndef PRODUCT
 2226         } else {
 2227           st->print("%s  %s, [rsp + #%d]\t# spill",
 2228                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2229                      Matcher::regName[dst_first],
 2230                      offset);
 2231 #endif
 2232         }
 2233       } else {
 2234         // 32-bit
 2235         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2236         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2237         int offset = ra_->reg2offset(src_first);
 2238         if (masm) {
 2239           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2240 #ifndef PRODUCT
 2241         } else {
 2242           st->print("movss   %s, [rsp + #%d]\t# spill",
 2243                      Matcher::regName[dst_first],
 2244                      offset);
 2245 #endif
 2246         }
 2247       }
 2248       return 0;
 2249     } else if (dst_first_rc == rc_kreg) {
 2250       // mem -> kreg
 2251       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2252           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2253         // 64-bit
 2254         int offset = ra_->reg2offset(src_first);
 2255         if (masm) {
 2256           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2257 #ifndef PRODUCT
 2258         } else {
 2259           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2260                      Matcher::regName[dst_first],
 2261                      offset);
 2262 #endif
 2263         }
 2264       }
 2265       return 0;
 2266     }
 2267   } else if (src_first_rc == rc_int) {
 2268     // gpr ->
 2269     if (dst_first_rc == rc_stack) {
 2270       // gpr -> mem
 2271       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2272           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2273         // 64-bit
 2274         int offset = ra_->reg2offset(dst_first);
 2275         if (masm) {
 2276           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2277 #ifndef PRODUCT
 2278         } else {
 2279           st->print("movq    [rsp + #%d], %s\t# spill",
 2280                      offset,
 2281                      Matcher::regName[src_first]);
 2282 #endif
 2283         }
 2284       } else {
 2285         // 32-bit
 2286         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2287         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2288         int offset = ra_->reg2offset(dst_first);
 2289         if (masm) {
 2290           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2291 #ifndef PRODUCT
 2292         } else {
 2293           st->print("movl    [rsp + #%d], %s\t# spill",
 2294                      offset,
 2295                      Matcher::regName[src_first]);
 2296 #endif
 2297         }
 2298       }
 2299       return 0;
 2300     } else if (dst_first_rc == rc_int) {
 2301       // gpr -> gpr
 2302       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2303           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2304         // 64-bit
 2305         if (masm) {
 2306           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2307                   as_Register(Matcher::_regEncode[src_first]));
 2308 #ifndef PRODUCT
 2309         } else {
 2310           st->print("movq    %s, %s\t# spill",
 2311                      Matcher::regName[dst_first],
 2312                      Matcher::regName[src_first]);
 2313 #endif
 2314         }
 2315         return 0;
 2316       } else {
 2317         // 32-bit
 2318         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2319         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2320         if (masm) {
 2321           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2322                   as_Register(Matcher::_regEncode[src_first]));
 2323 #ifndef PRODUCT
 2324         } else {
 2325           st->print("movl    %s, %s\t# spill",
 2326                      Matcher::regName[dst_first],
 2327                      Matcher::regName[src_first]);
 2328 #endif
 2329         }
 2330         return 0;
 2331       }
 2332     } else if (dst_first_rc == rc_float) {
 2333       // gpr -> xmm
 2334       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2335           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2336         // 64-bit
 2337         if (masm) {
 2338           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2339 #ifndef PRODUCT
 2340         } else {
 2341           st->print("movdq   %s, %s\t# spill",
 2342                      Matcher::regName[dst_first],
 2343                      Matcher::regName[src_first]);
 2344 #endif
 2345         }
 2346       } else {
 2347         // 32-bit
 2348         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2349         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2350         if (masm) {
 2351           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2352 #ifndef PRODUCT
 2353         } else {
 2354           st->print("movdl   %s, %s\t# spill",
 2355                      Matcher::regName[dst_first],
 2356                      Matcher::regName[src_first]);
 2357 #endif
 2358         }
 2359       }
 2360       return 0;
 2361     } else if (dst_first_rc == rc_kreg) {
 2362       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2363           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2364         // 64-bit
 2365         if (masm) {
 2366           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2367   #ifndef PRODUCT
 2368         } else {
 2369            st->print("kmovq   %s, %s\t# spill",
 2370                        Matcher::regName[dst_first],
 2371                        Matcher::regName[src_first]);
 2372   #endif
 2373         }
 2374       }
 2375       Unimplemented();
 2376       return 0;
 2377     }
 2378   } else if (src_first_rc == rc_float) {
 2379     // xmm ->
 2380     if (dst_first_rc == rc_stack) {
 2381       // xmm -> mem
 2382       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2383           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2384         // 64-bit
 2385         int offset = ra_->reg2offset(dst_first);
 2386         if (masm) {
 2387           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2388 #ifndef PRODUCT
 2389         } else {
 2390           st->print("movsd   [rsp + #%d], %s\t# spill",
 2391                      offset,
 2392                      Matcher::regName[src_first]);
 2393 #endif
 2394         }
 2395       } else {
 2396         // 32-bit
 2397         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2398         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2399         int offset = ra_->reg2offset(dst_first);
 2400         if (masm) {
 2401           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2402 #ifndef PRODUCT
 2403         } else {
 2404           st->print("movss   [rsp + #%d], %s\t# spill",
 2405                      offset,
 2406                      Matcher::regName[src_first]);
 2407 #endif
 2408         }
 2409       }
 2410       return 0;
 2411     } else if (dst_first_rc == rc_int) {
 2412       // xmm -> gpr
 2413       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2414           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2415         // 64-bit
 2416         if (masm) {
 2417           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2418 #ifndef PRODUCT
 2419         } else {
 2420           st->print("movdq   %s, %s\t# spill",
 2421                      Matcher::regName[dst_first],
 2422                      Matcher::regName[src_first]);
 2423 #endif
 2424         }
 2425       } else {
 2426         // 32-bit
 2427         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2428         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2429         if (masm) {
 2430           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2431 #ifndef PRODUCT
 2432         } else {
 2433           st->print("movdl   %s, %s\t# spill",
 2434                      Matcher::regName[dst_first],
 2435                      Matcher::regName[src_first]);
 2436 #endif
 2437         }
 2438       }
 2439       return 0;
 2440     } else if (dst_first_rc == rc_float) {
 2441       // xmm -> xmm
 2442       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2443           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2444         // 64-bit
 2445         if (masm) {
 2446           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2447 #ifndef PRODUCT
 2448         } else {
 2449           st->print("%s  %s, %s\t# spill",
 2450                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2451                      Matcher::regName[dst_first],
 2452                      Matcher::regName[src_first]);
 2453 #endif
 2454         }
 2455       } else {
 2456         // 32-bit
 2457         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2458         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2459         if (masm) {
 2460           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2461 #ifndef PRODUCT
 2462         } else {
 2463           st->print("%s  %s, %s\t# spill",
 2464                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2465                      Matcher::regName[dst_first],
 2466                      Matcher::regName[src_first]);
 2467 #endif
 2468         }
 2469       }
 2470       return 0;
 2471     } else if (dst_first_rc == rc_kreg) {
 2472       assert(false, "Illegal spilling");
 2473       return 0;
 2474     }
 2475   } else if (src_first_rc == rc_kreg) {
 2476     if (dst_first_rc == rc_stack) {
 2477       // mem -> kreg
 2478       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2479           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2480         // 64-bit
 2481         int offset = ra_->reg2offset(dst_first);
 2482         if (masm) {
 2483           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2484 #ifndef PRODUCT
 2485         } else {
 2486           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2487                      offset,
 2488                      Matcher::regName[src_first]);
 2489 #endif
 2490         }
 2491       }
 2492       return 0;
 2493     } else if (dst_first_rc == rc_int) {
 2494       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2495           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2496         // 64-bit
 2497         if (masm) {
 2498           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2499 #ifndef PRODUCT
 2500         } else {
 2501          st->print("kmovq   %s, %s\t# spill",
 2502                      Matcher::regName[dst_first],
 2503                      Matcher::regName[src_first]);
 2504 #endif
 2505         }
 2506       }
 2507       Unimplemented();
 2508       return 0;
 2509     } else if (dst_first_rc == rc_kreg) {
 2510       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2511           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2512         // 64-bit
 2513         if (masm) {
 2514           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2515 #ifndef PRODUCT
 2516         } else {
 2517          st->print("kmovq   %s, %s\t# spill",
 2518                      Matcher::regName[dst_first],
 2519                      Matcher::regName[src_first]);
 2520 #endif
 2521         }
 2522       }
 2523       return 0;
 2524     } else if (dst_first_rc == rc_float) {
 2525       assert(false, "Illegal spill");
 2526       return 0;
 2527     }
 2528   }
 2529 
 2530   assert(0," foo ");
 2531   Unimplemented();
 2532   return 0;
 2533 }
 2534 
 2535 #ifndef PRODUCT
 2536 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2537   implementation(nullptr, ra_, false, st);
 2538 }
 2539 #endif
 2540 
 2541 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2542   implementation(masm, ra_, false, nullptr);
 2543 }
 2544 
 2545 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2546   return MachNode::size(ra_);
 2547 }
 2548 
 2549 //=============================================================================
 2550 #ifndef PRODUCT
 2551 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2552 {
 2553   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2554   int reg = ra_->get_reg_first(this);
 2555   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2556             Matcher::regName[reg], offset);
 2557 }
 2558 #endif
 2559 
 2560 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2561 {
 2562   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2563   int reg = ra_->get_encode(this);
 2564 
 2565   __ lea(as_Register(reg), Address(rsp, offset));
 2566 }
 2567 
 2568 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2569 {
 2570   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2571   if (ra_->get_encode(this) > 15) {
 2572     return (offset < 0x80) ? 6 : 9; // REX2
 2573   } else {
 2574     return (offset < 0x80) ? 5 : 8; // REX
 2575   }
 2576 }
 2577 
 2578 //=============================================================================
 2579 #ifndef PRODUCT
 2580 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2581 {
 2582   st->print_cr("MachVEPNode");
 2583 }
 2584 #endif
 2585 
 2586 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2587 {
 2588   CodeBuffer* cbuf = masm->code();
 2589   uint insts_size = cbuf->insts_size();
 2590   if (!_verified) {
 2591     __ ic_check(1);
 2592   } else {
 2593     // TODO 8284443 Avoid creation of temporary frame
 2594     if (ra_->C->stub_function() == nullptr) {
 2595       __ verified_entry(ra_->C, 0);
 2596       __ entry_barrier();
 2597       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2598       __ remove_frame(initial_framesize, false);
 2599     }
 2600     // Unpack inline type args passed as oop and then jump to
 2601     // the verified entry point (skipping the unverified entry).
 2602     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2603     // Emit code for verified entry and save increment for stack repair on return
 2604     __ verified_entry(ra_->C, sp_inc);
 2605     if (Compile::current()->output()->in_scratch_emit_size()) {
 2606       Label dummy_verified_entry;
 2607       __ jmp(dummy_verified_entry);
 2608     } else {
 2609       __ jmp(*_verified_entry);
 2610     }
 2611   }
 2612   /* WARNING these NOPs are critical so that verified entry point is properly
 2613      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 2614   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 2615   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 2616   if (nops_cnt > 0) {
 2617     __ nop(nops_cnt);
 2618   }
 2619 }
 2620 
 2621 //=============================================================================
 2622 #ifndef PRODUCT
 2623 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2624 {
 2625   if (UseCompressedClassPointers) {
 2626     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2627     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2628   } else {
 2629     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2630     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2631   }
 2632   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2633 }
 2634 #endif
 2635 
 2636 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2637 {
 2638   __ ic_check(InteriorEntryAlignment);
 2639 }
 2640 
 2641 
 2642 //=============================================================================
 2643 
 2644 bool Matcher::supports_vector_calling_convention(void) {
 2645   return EnableVectorSupport;
 2646 }
 2647 
 2648 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2649   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2650 }
 2651 
 2652 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2653   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2654 }
 2655 
 2656 #ifdef ASSERT
 2657 static bool is_ndd_demotable(const MachNode* mdef) {
 2658   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2659 }
 2660 #endif
 2661 
 2662 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2663                                             int oper_index) {
 2664   if (mdef == nullptr) {
 2665     return false;
 2666   }
 2667 
 2668   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2669       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2670     assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
 2671     assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
 2672     return false;
 2673   }
 2674 
 2675   // Complex memory operand covers multiple incoming edges needed for
 2676   // address computation. Biasing def towards any address component will not
 2677   // result in NDD demotion by assembler.
 2678   if (mdef->operand_num_edges(oper_index) != 1) {
 2679     return false;
 2680   }
 2681 
 2682   // Demotion candidate must be register mask compatible with definition.
 2683   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2684   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2685     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2686     return false;
 2687   }
 2688 
 2689   switch (oper_index) {
 2690   // First operand of MachNode corresponding to Intel APX NDD selection
 2691   // pattern can share its assigned register with definition operand if
 2692   // their live ranges do not overlap. In such a scenario we can demote
 2693   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2694   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2695   // are decorated with a special flag by instruction selector.
 2696   case 1:
 2697     return is_ndd_demotable_opr1(mdef);
 2698 
 2699   // Definition operand of commutative operation can be biased towards second
 2700   // operand.
 2701   case 2:
 2702     return is_ndd_demotable_opr2(mdef);
 2703 
 2704   // Current scheme only selects up to two biasing candidates
 2705   default:
 2706     assert(false, "unhandled operand index: %s", mdef->Name());
 2707     break;
 2708   }
 2709 
 2710   return false;
 2711 }
 2712 
 2713 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2714   assert(EnableVectorSupport, "sanity");
 2715   int lo = XMM0_num;
 2716   int hi = XMM0b_num;
 2717   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2718   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2719   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2720   return OptoRegPair(hi, lo);
 2721 }
 2722 
 2723 // Is this branch offset short enough that a short branch can be used?
 2724 //
 2725 // NOTE: If the platform does not provide any short branch variants, then
 2726 //       this method should return false for offset 0.
 2727 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2728   // The passed offset is relative to address of the branch.
 2729   // On 86 a branch displacement is calculated relative to address
 2730   // of a next instruction.
 2731   offset -= br_size;
 2732 
 2733   // the short version of jmpConUCF2 contains multiple branches,
 2734   // making the reach slightly less
 2735   if (rule == jmpConUCF2_rule)
 2736     return (-126 <= offset && offset <= 125);
 2737   return (-128 <= offset && offset <= 127);
 2738 }
 2739 
 2740 // Return whether or not this register is ever used as an argument.
 2741 // This function is used on startup to build the trampoline stubs in
 2742 // generateOptoStub.  Registers not mentioned will be killed by the VM
 2743 // call in the trampoline, and arguments in those registers not be
 2744 // available to the callee.
 2745 bool Matcher::can_be_java_arg(int reg)
 2746 {
 2747   return
 2748     reg ==  RDI_num || reg == RDI_H_num ||
 2749     reg ==  RSI_num || reg == RSI_H_num ||
 2750     reg ==  RDX_num || reg == RDX_H_num ||
 2751     reg ==  RCX_num || reg == RCX_H_num ||
 2752     reg ==   R8_num || reg ==  R8_H_num ||
 2753     reg ==   R9_num || reg ==  R9_H_num ||
 2754     reg ==  R12_num || reg == R12_H_num ||
 2755     reg == XMM0_num || reg == XMM0b_num ||
 2756     reg == XMM1_num || reg == XMM1b_num ||
 2757     reg == XMM2_num || reg == XMM2b_num ||
 2758     reg == XMM3_num || reg == XMM3b_num ||
 2759     reg == XMM4_num || reg == XMM4b_num ||
 2760     reg == XMM5_num || reg == XMM5b_num ||
 2761     reg == XMM6_num || reg == XMM6b_num ||
 2762     reg == XMM7_num || reg == XMM7b_num;
 2763 }
 2764 
 2765 bool Matcher::is_spillable_arg(int reg)
 2766 {
 2767   return can_be_java_arg(reg);
 2768 }
 2769 
 2770 uint Matcher::int_pressure_limit()
 2771 {
 2772   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2773 }
 2774 
 2775 uint Matcher::float_pressure_limit()
 2776 {
 2777   // After experiment around with different values, the following default threshold
 2778   // works best for LCM's register pressure scheduling on x64.
 2779   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2780   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2781   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2782 }
 2783 
 2784 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 2785   // In 64 bit mode a code which use multiply when
 2786   // devisor is constant is faster than hardware
 2787   // DIV instruction (it uses MulHiL).
 2788   return false;
 2789 }
 2790 
 2791 // Register for DIVI projection of divmodI
 2792 const RegMask& Matcher::divI_proj_mask() {
 2793   return INT_RAX_REG_mask();
 2794 }
 2795 
 2796 // Register for MODI projection of divmodI
 2797 const RegMask& Matcher::modI_proj_mask() {
 2798   return INT_RDX_REG_mask();
 2799 }
 2800 
 2801 // Register for DIVL projection of divmodL
 2802 const RegMask& Matcher::divL_proj_mask() {
 2803   return LONG_RAX_REG_mask();
 2804 }
 2805 
 2806 // Register for MODL projection of divmodL
 2807 const RegMask& Matcher::modL_proj_mask() {
 2808   return LONG_RDX_REG_mask();
 2809 }
 2810 
 2811 %}
 2812 
 2813 source_hpp %{
 2814 // Header information of the source block.
 2815 // Method declarations/definitions which are used outside
 2816 // the ad-scope can conveniently be defined here.
 2817 //
 2818 // To keep related declarations/definitions/uses close together,
 2819 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2820 
 2821 #include "runtime/vm_version.hpp"
 2822 
 2823 class NativeJump;
 2824 
 2825 class CallStubImpl {
 2826 
 2827   //--------------------------------------------------------------
 2828   //---<  Used for optimization in Compile::shorten_branches  >---
 2829   //--------------------------------------------------------------
 2830 
 2831  public:
 2832   // Size of call trampoline stub.
 2833   static uint size_call_trampoline() {
 2834     return 0; // no call trampolines on this platform
 2835   }
 2836 
 2837   // number of relocations needed by a call trampoline stub
 2838   static uint reloc_call_trampoline() {
 2839     return 0; // no call trampolines on this platform
 2840   }
 2841 };
 2842 
 2843 class HandlerImpl {
 2844 
 2845  public:
 2846 
 2847   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2848 
 2849   static uint size_deopt_handler() {
 2850     // one call and one jmp.
 2851     return 7;
 2852   }
 2853 };
 2854 
 2855 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2856   switch(bytes) {
 2857     case  4: // fall-through
 2858     case  8: // fall-through
 2859     case 16: return Assembler::AVX_128bit;
 2860     case 32: return Assembler::AVX_256bit;
 2861     case 64: return Assembler::AVX_512bit;
 2862 
 2863     default: {
 2864       ShouldNotReachHere();
 2865       return Assembler::AVX_NoVec;
 2866     }
 2867   }
 2868 }
 2869 
 2870 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2871   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2872 }
 2873 
 2874 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2875   uint def_idx = use->operand_index(opnd);
 2876   Node* def = use->in(def_idx);
 2877   return vector_length_encoding(def);
 2878 }
 2879 
 2880 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2881   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2882          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2883 }
 2884 
 2885 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2886   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2887            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2888 }
 2889 
 2890 class Node::PD {
 2891 public:
 2892   enum NodeFlags : uint64_t {
 2893     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2894     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2895     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2896     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2897     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2898     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2899     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2900     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2901     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2902     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2903     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2904     Flag_ndd_demotable_opr1   = Node::_last_flag << 12,
 2905     Flag_ndd_demotable_opr2   = Node::_last_flag << 13,
 2906     _last_flag                = Flag_ndd_demotable_opr2
 2907   };
 2908 };
 2909 
 2910 %} // end source_hpp
 2911 
 2912 source %{
 2913 
 2914 #include "opto/addnode.hpp"
 2915 #include "c2_intelJccErratum_x86.hpp"
 2916 
 2917 void PhaseOutput::pd_perform_mach_node_analysis() {
 2918   if (VM_Version::has_intel_jcc_erratum()) {
 2919     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2920     _buf_sizes._code += extra_padding;
 2921   }
 2922 }
 2923 
 2924 int MachNode::pd_alignment_required() const {
 2925   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2926     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2927     return IntelJccErratum::largest_jcc_size() + 1;
 2928   } else {
 2929     return 1;
 2930   }
 2931 }
 2932 
 2933 int MachNode::compute_padding(int current_offset) const {
 2934   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2935     Compile* C = Compile::current();
 2936     PhaseOutput* output = C->output();
 2937     Block* block = output->block();
 2938     int index = output->index();
 2939     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2940   } else {
 2941     return 0;
 2942   }
 2943 }
 2944 
 2945 // Emit deopt handler code.
 2946 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2947 
 2948   // Note that the code buffer's insts_mark is always relative to insts.
 2949   // That's why we must use the macroassembler to generate a handler.
 2950   address base = __ start_a_stub(size_deopt_handler());
 2951   if (base == nullptr) {
 2952     ciEnv::current()->record_failure("CodeCache is full");
 2953     return 0;  // CodeBuffer::expand failed
 2954   }
 2955   int offset = __ offset();
 2956 
 2957   Label start;
 2958   __ bind(start);
 2959 
 2960   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2961 
 2962   int entry_offset = __ offset();
 2963 
 2964   __ jmp(start);
 2965 
 2966   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2967   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2968          "out of bounds read in post-call NOP check");
 2969   __ end_a_stub();
 2970   return entry_offset;
 2971 }
 2972 
 2973 static Assembler::Width widthForType(BasicType bt) {
 2974   if (bt == T_BYTE) {
 2975     return Assembler::B;
 2976   } else if (bt == T_SHORT) {
 2977     return Assembler::W;
 2978   } else if (bt == T_INT) {
 2979     return Assembler::D;
 2980   } else {
 2981     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2982     return Assembler::Q;
 2983   }
 2984 }
 2985 
 2986 //=============================================================================
 2987 
 2988   // Float masks come from different places depending on platform.
 2989   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2990   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2991   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2992   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2993   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2994   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2995   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2996   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2997   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2998   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2999   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 3000   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 3001   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 3002   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 3003   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 3004   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 3005   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 3006   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 3007   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 3008 
 3009 //=============================================================================
 3010 bool Matcher::match_rule_supported(int opcode) {
 3011   if (!has_match_rule(opcode)) {
 3012     return false; // no match rule present
 3013   }
 3014   switch (opcode) {
 3015     case Op_AbsVL:
 3016     case Op_StoreVectorScatter:
 3017       if (UseAVX < 3) {
 3018         return false;
 3019       }
 3020       break;
 3021     case Op_PopCountI:
 3022     case Op_PopCountL:
 3023       if (!UsePopCountInstruction) {
 3024         return false;
 3025       }
 3026       break;
 3027     case Op_PopCountVI:
 3028       if (UseAVX < 2) {
 3029         return false;
 3030       }
 3031       break;
 3032     case Op_CompressV:
 3033     case Op_ExpandV:
 3034     case Op_PopCountVL:
 3035       if (UseAVX < 2) {
 3036         return false;
 3037       }
 3038       break;
 3039     case Op_MulVI:
 3040       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3041         return false;
 3042       }
 3043       break;
 3044     case Op_MulVL:
 3045       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3046         return false;
 3047       }
 3048       break;
 3049     case Op_MulReductionVL:
 3050       if (VM_Version::supports_avx512dq() == false) {
 3051         return false;
 3052       }
 3053       break;
 3054     case Op_AbsVB:
 3055     case Op_AbsVS:
 3056     case Op_AbsVI:
 3057     case Op_AddReductionVI:
 3058     case Op_AndReductionV:
 3059     case Op_OrReductionV:
 3060     case Op_XorReductionV:
 3061       if (UseSSE < 3) { // requires at least SSSE3
 3062         return false;
 3063       }
 3064       break;
 3065     case Op_MaxHF:
 3066     case Op_MinHF:
 3067       if (!VM_Version::supports_avx512vlbw()) {
 3068         return false;
 3069       }  // fallthrough
 3070     case Op_AddHF:
 3071     case Op_DivHF:
 3072     case Op_FmaHF:
 3073     case Op_MulHF:
 3074     case Op_ReinterpretS2HF:
 3075     case Op_ReinterpretHF2S:
 3076     case Op_SubHF:
 3077     case Op_SqrtHF:
 3078       if (!VM_Version::supports_avx512_fp16()) {
 3079         return false;
 3080       }
 3081       break;
 3082     case Op_VectorLoadShuffle:
 3083     case Op_VectorRearrange:
 3084     case Op_MulReductionVI:
 3085       if (UseSSE < 4) { // requires at least SSE4
 3086         return false;
 3087       }
 3088       break;
 3089     case Op_IsInfiniteF:
 3090     case Op_IsInfiniteD:
 3091       if (!VM_Version::supports_avx512dq()) {
 3092         return false;
 3093       }
 3094       break;
 3095     case Op_SqrtVD:
 3096     case Op_SqrtVF:
 3097     case Op_VectorMaskCmp:
 3098     case Op_VectorCastB2X:
 3099     case Op_VectorCastS2X:
 3100     case Op_VectorCastI2X:
 3101     case Op_VectorCastL2X:
 3102     case Op_VectorCastF2X:
 3103     case Op_VectorCastD2X:
 3104     case Op_VectorUCastB2X:
 3105     case Op_VectorUCastS2X:
 3106     case Op_VectorUCastI2X:
 3107     case Op_VectorMaskCast:
 3108       if (UseAVX < 1) { // enabled for AVX only
 3109         return false;
 3110       }
 3111       break;
 3112     case Op_PopulateIndex:
 3113       if (UseAVX < 2) {
 3114         return false;
 3115       }
 3116       break;
 3117     case Op_RoundVF:
 3118       if (UseAVX < 2) { // enabled for AVX2 only
 3119         return false;
 3120       }
 3121       break;
 3122     case Op_RoundVD:
 3123       if (UseAVX < 3) {
 3124         return false;  // enabled for AVX3 only
 3125       }
 3126       break;
 3127     case Op_CompareAndSwapL:
 3128     case Op_CompareAndSwapP:
 3129       break;
 3130     case Op_StrIndexOf:
 3131       if (!UseSSE42Intrinsics) {
 3132         return false;
 3133       }
 3134       break;
 3135     case Op_StrIndexOfChar:
 3136       if (!UseSSE42Intrinsics) {
 3137         return false;
 3138       }
 3139       break;
 3140     case Op_OnSpinWait:
 3141       if (VM_Version::supports_on_spin_wait() == false) {
 3142         return false;
 3143       }
 3144       break;
 3145     case Op_MulVB:
 3146     case Op_LShiftVB:
 3147     case Op_RShiftVB:
 3148     case Op_URShiftVB:
 3149     case Op_VectorInsert:
 3150     case Op_VectorLoadMask:
 3151     case Op_VectorStoreMask:
 3152     case Op_VectorBlend:
 3153       if (UseSSE < 4) {
 3154         return false;
 3155       }
 3156       break;
 3157     case Op_MaxD:
 3158     case Op_MaxF:
 3159     case Op_MinD:
 3160     case Op_MinF:
 3161       if (UseAVX < 1) { // enabled for AVX only
 3162         return false;
 3163       }
 3164       break;
 3165     case Op_CacheWB:
 3166     case Op_CacheWBPreSync:
 3167     case Op_CacheWBPostSync:
 3168       if (!VM_Version::supports_data_cache_line_flush()) {
 3169         return false;
 3170       }
 3171       break;
 3172     case Op_ExtractB:
 3173     case Op_ExtractL:
 3174     case Op_ExtractI:
 3175     case Op_RoundDoubleMode:
 3176       if (UseSSE < 4) {
 3177         return false;
 3178       }
 3179       break;
 3180     case Op_RoundDoubleModeV:
 3181       if (VM_Version::supports_avx() == false) {
 3182         return false; // 128bit vroundpd is not available
 3183       }
 3184       break;
 3185     case Op_LoadVectorGather:
 3186     case Op_LoadVectorGatherMasked:
 3187       if (UseAVX < 2) {
 3188         return false;
 3189       }
 3190       break;
 3191     case Op_FmaF:
 3192     case Op_FmaD:
 3193     case Op_FmaVD:
 3194     case Op_FmaVF:
 3195       if (!UseFMA) {
 3196         return false;
 3197       }
 3198       break;
 3199     case Op_MacroLogicV:
 3200       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3201         return false;
 3202       }
 3203       break;
 3204 
 3205     case Op_VectorCmpMasked:
 3206     case Op_VectorMaskGen:
 3207       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3208         return false;
 3209       }
 3210       break;
 3211     case Op_VectorMaskFirstTrue:
 3212     case Op_VectorMaskLastTrue:
 3213     case Op_VectorMaskTrueCount:
 3214     case Op_VectorMaskToLong:
 3215       if (UseAVX < 1) {
 3216          return false;
 3217       }
 3218       break;
 3219     case Op_RoundF:
 3220     case Op_RoundD:
 3221       break;
 3222     case Op_CopySignD:
 3223     case Op_CopySignF:
 3224       if (UseAVX < 3)  {
 3225         return false;
 3226       }
 3227       if (!VM_Version::supports_avx512vl()) {
 3228         return false;
 3229       }
 3230       break;
 3231     case Op_CompressBits:
 3232     case Op_ExpandBits:
 3233       if (!VM_Version::supports_bmi2()) {
 3234         return false;
 3235       }
 3236       break;
 3237     case Op_CompressM:
 3238       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3239         return false;
 3240       }
 3241       break;
 3242     case Op_ConvF2HF:
 3243     case Op_ConvHF2F:
 3244       if (!VM_Version::supports_float16()) {
 3245         return false;
 3246       }
 3247       break;
 3248     case Op_VectorCastF2HF:
 3249     case Op_VectorCastHF2F:
 3250       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3251         return false;
 3252       }
 3253       break;
 3254   }
 3255   return true;  // Match rules are supported by default.
 3256 }
 3257 
 3258 //------------------------------------------------------------------------
 3259 
 3260 static inline bool is_pop_count_instr_target(BasicType bt) {
 3261   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3262          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3263 }
 3264 
 3265 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3266   return match_rule_supported_vector(opcode, vlen, bt);
 3267 }
 3268 
 3269 // Identify extra cases that we might want to provide match rules for vector nodes and
 3270 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3271 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3272   if (!match_rule_supported(opcode)) {
 3273     return false;
 3274   }
 3275   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3276   //   * SSE2 supports 128bit vectors for all types;
 3277   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3278   //   * AVX2 supports 256bit vectors for all types;
 3279   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3280   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3281   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3282   // And MaxVectorSize is taken into account as well.
 3283   if (!vector_size_supported(bt, vlen)) {
 3284     return false;
 3285   }
 3286   // Special cases which require vector length follow:
 3287   //   * implementation limitations
 3288   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3289   //   * 128bit vroundpd instruction is present only in AVX1
 3290   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3291   switch (opcode) {
 3292     case Op_MaxVHF:
 3293     case Op_MinVHF:
 3294       if (!VM_Version::supports_avx512bw()) {
 3295         return false;
 3296       }
 3297     case Op_AddVHF:
 3298     case Op_DivVHF:
 3299     case Op_FmaVHF:
 3300     case Op_MulVHF:
 3301     case Op_SubVHF:
 3302     case Op_SqrtVHF:
 3303       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3304         return false;
 3305       }
 3306       if (!VM_Version::supports_avx512_fp16()) {
 3307         return false;
 3308       }
 3309       break;
 3310     case Op_AbsVF:
 3311     case Op_NegVF:
 3312       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3313         return false; // 512bit vandps and vxorps are not available
 3314       }
 3315       break;
 3316     case Op_AbsVD:
 3317     case Op_NegVD:
 3318       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3319         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3320       }
 3321       break;
 3322     case Op_RotateRightV:
 3323     case Op_RotateLeftV:
 3324       if (bt != T_INT && bt != T_LONG) {
 3325         return false;
 3326       } // fallthrough
 3327     case Op_MacroLogicV:
 3328       if (!VM_Version::supports_evex() ||
 3329           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3330         return false;
 3331       }
 3332       break;
 3333     case Op_ClearArray:
 3334     case Op_VectorMaskGen:
 3335     case Op_VectorCmpMasked:
 3336       if (!VM_Version::supports_avx512bw()) {
 3337         return false;
 3338       }
 3339       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3340         return false;
 3341       }
 3342       break;
 3343     case Op_LoadVectorMasked:
 3344     case Op_StoreVectorMasked:
 3345       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3346         return false;
 3347       }
 3348       break;
 3349     case Op_UMinV:
 3350     case Op_UMaxV:
 3351       if (UseAVX == 0) {
 3352         return false;
 3353       }
 3354       break;
 3355     case Op_MaxV:
 3356     case Op_MinV:
 3357       if (UseSSE < 4 && is_integral_type(bt)) {
 3358         return false;
 3359       }
 3360       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3361           // Float/Double intrinsics are enabled for AVX family currently.
 3362           if (UseAVX == 0) {
 3363             return false;
 3364           }
 3365           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3366             return false;
 3367           }
 3368       }
 3369       break;
 3370     case Op_CallLeafVector:
 3371       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3372         return false;
 3373       }
 3374       break;
 3375     case Op_AddReductionVI:
 3376       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3377         return false;
 3378       }
 3379       // fallthrough
 3380     case Op_AndReductionV:
 3381     case Op_OrReductionV:
 3382     case Op_XorReductionV:
 3383       if (is_subword_type(bt) && (UseSSE < 4)) {
 3384         return false;
 3385       }
 3386       break;
 3387     case Op_MinReductionV:
 3388     case Op_MaxReductionV:
 3389       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3390         return false;
 3391       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3392         return false;
 3393       }
 3394       // Float/Double intrinsics enabled for AVX family.
 3395       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3396         return false;
 3397       }
 3398       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3399         return false;
 3400       }
 3401       break;
 3402     case Op_VectorBlend:
 3403       if (UseAVX == 0 && size_in_bits < 128) {
 3404         return false;
 3405       }
 3406       break;
 3407     case Op_VectorTest:
 3408       if (UseSSE < 4) {
 3409         return false; // Implementation limitation
 3410       } else if (size_in_bits < 32) {
 3411         return false; // Implementation limitation
 3412       }
 3413       break;
 3414     case Op_VectorLoadShuffle:
 3415     case Op_VectorRearrange:
 3416       if(vlen == 2) {
 3417         return false; // Implementation limitation due to how shuffle is loaded
 3418       } else if (size_in_bits == 256 && UseAVX < 2) {
 3419         return false; // Implementation limitation
 3420       }
 3421       break;
 3422     case Op_VectorLoadMask:
 3423     case Op_VectorMaskCast:
 3424       if (size_in_bits == 256 && UseAVX < 2) {
 3425         return false; // Implementation limitation
 3426       }
 3427       // fallthrough
 3428     case Op_VectorStoreMask:
 3429       if (vlen == 2) {
 3430         return false; // Implementation limitation
 3431       }
 3432       break;
 3433     case Op_PopulateIndex:
 3434       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3435         return false;
 3436       }
 3437       break;
 3438     case Op_VectorCastB2X:
 3439     case Op_VectorCastS2X:
 3440     case Op_VectorCastI2X:
 3441       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3442         return false;
 3443       }
 3444       break;
 3445     case Op_VectorCastL2X:
 3446       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3447         return false;
 3448       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3449         return false;
 3450       }
 3451       break;
 3452     case Op_VectorCastF2X: {
 3453         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3454         // happen after intermediate conversion to integer and special handling
 3455         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3456         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3457         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3458           return false;
 3459         }
 3460       }
 3461       // fallthrough
 3462     case Op_VectorCastD2X:
 3463       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3464         return false;
 3465       }
 3466       break;
 3467     case Op_VectorCastF2HF:
 3468     case Op_VectorCastHF2F:
 3469       if (!VM_Version::supports_f16c() &&
 3470          ((!VM_Version::supports_evex() ||
 3471          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3472         return false;
 3473       }
 3474       break;
 3475     case Op_RoundVD:
 3476       if (!VM_Version::supports_avx512dq()) {
 3477         return false;
 3478       }
 3479       break;
 3480     case Op_MulReductionVI:
 3481       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3482         return false;
 3483       }
 3484       break;
 3485     case Op_LoadVectorGatherMasked:
 3486       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3487         return false;
 3488       }
 3489       if (is_subword_type(bt) &&
 3490          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3491           (size_in_bits < 64)                                      ||
 3492           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3493         return false;
 3494       }
 3495       break;
 3496     case Op_StoreVectorScatterMasked:
 3497     case Op_StoreVectorScatter:
 3498       if (is_subword_type(bt)) {
 3499         return false;
 3500       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3501         return false;
 3502       }
 3503       // fallthrough
 3504     case Op_LoadVectorGather:
 3505       if (!is_subword_type(bt) && size_in_bits == 64) {
 3506         return false;
 3507       }
 3508       if (is_subword_type(bt) && size_in_bits < 64) {
 3509         return false;
 3510       }
 3511       break;
 3512     case Op_SaturatingAddV:
 3513     case Op_SaturatingSubV:
 3514       if (UseAVX < 1) {
 3515         return false; // Implementation limitation
 3516       }
 3517       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3518         return false;
 3519       }
 3520       break;
 3521     case Op_SelectFromTwoVector:
 3522        if (size_in_bits < 128) {
 3523          return false;
 3524        }
 3525        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3526          return false;
 3527        }
 3528        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3529          return false;
 3530        }
 3531        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3532          return false;
 3533        }
 3534        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3535          return false;
 3536        }
 3537        break;
 3538     case Op_MaskAll:
 3539       if (!VM_Version::supports_evex()) {
 3540         return false;
 3541       }
 3542       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3543         return false;
 3544       }
 3545       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3546         return false;
 3547       }
 3548       break;
 3549     case Op_VectorMaskCmp:
 3550       if (vlen < 2 || size_in_bits < 32) {
 3551         return false;
 3552       }
 3553       break;
 3554     case Op_CompressM:
 3555       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3556         return false;
 3557       }
 3558       break;
 3559     case Op_CompressV:
 3560     case Op_ExpandV:
 3561       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3562         return false;
 3563       }
 3564       if (size_in_bits < 128 ) {
 3565         return false;
 3566       }
 3567     case Op_VectorLongToMask:
 3568       if (UseAVX < 1) {
 3569         return false;
 3570       }
 3571       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3572         return false;
 3573       }
 3574       break;
 3575     case Op_SignumVD:
 3576     case Op_SignumVF:
 3577       if (UseAVX < 1) {
 3578         return false;
 3579       }
 3580       break;
 3581     case Op_PopCountVI:
 3582     case Op_PopCountVL: {
 3583         if (!is_pop_count_instr_target(bt) &&
 3584             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3585           return false;
 3586         }
 3587       }
 3588       break;
 3589     case Op_ReverseV:
 3590     case Op_ReverseBytesV:
 3591       if (UseAVX < 2) {
 3592         return false;
 3593       }
 3594       break;
 3595     case Op_CountTrailingZerosV:
 3596     case Op_CountLeadingZerosV:
 3597       if (UseAVX < 2) {
 3598         return false;
 3599       }
 3600       break;
 3601   }
 3602   return true;  // Per default match rules are supported.
 3603 }
 3604 
 3605 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3606   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3607   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3608   // of their non-masked counterpart with mask edge being the differentiator.
 3609   // This routine does a strict check on the existence of masked operation patterns
 3610   // by returning a default false value for all the other opcodes apart from the
 3611   // ones whose masked instruction patterns are defined in this file.
 3612   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3613     return false;
 3614   }
 3615 
 3616   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3617   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3618     return false;
 3619   }
 3620   switch(opcode) {
 3621     // Unary masked operations
 3622     case Op_AbsVB:
 3623     case Op_AbsVS:
 3624       if(!VM_Version::supports_avx512bw()) {
 3625         return false;  // Implementation limitation
 3626       }
 3627     case Op_AbsVI:
 3628     case Op_AbsVL:
 3629       return true;
 3630 
 3631     // Ternary masked operations
 3632     case Op_FmaVF:
 3633     case Op_FmaVD:
 3634       return true;
 3635 
 3636     case Op_MacroLogicV:
 3637       if(bt != T_INT && bt != T_LONG) {
 3638         return false;
 3639       }
 3640       return true;
 3641 
 3642     // Binary masked operations
 3643     case Op_AddVB:
 3644     case Op_AddVS:
 3645     case Op_SubVB:
 3646     case Op_SubVS:
 3647     case Op_MulVS:
 3648     case Op_LShiftVS:
 3649     case Op_RShiftVS:
 3650     case Op_URShiftVS:
 3651       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3652       if (!VM_Version::supports_avx512bw()) {
 3653         return false;  // Implementation limitation
 3654       }
 3655       return true;
 3656 
 3657     case Op_MulVL:
 3658       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3659       if (!VM_Version::supports_avx512dq()) {
 3660         return false;  // Implementation limitation
 3661       }
 3662       return true;
 3663 
 3664     case Op_AndV:
 3665     case Op_OrV:
 3666     case Op_XorV:
 3667     case Op_RotateRightV:
 3668     case Op_RotateLeftV:
 3669       if (bt != T_INT && bt != T_LONG) {
 3670         return false; // Implementation limitation
 3671       }
 3672       return true;
 3673 
 3674     case Op_VectorLoadMask:
 3675       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3676       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3677         return false;
 3678       }
 3679       return true;
 3680 
 3681     case Op_AddVI:
 3682     case Op_AddVL:
 3683     case Op_AddVF:
 3684     case Op_AddVD:
 3685     case Op_SubVI:
 3686     case Op_SubVL:
 3687     case Op_SubVF:
 3688     case Op_SubVD:
 3689     case Op_MulVI:
 3690     case Op_MulVF:
 3691     case Op_MulVD:
 3692     case Op_DivVF:
 3693     case Op_DivVD:
 3694     case Op_SqrtVF:
 3695     case Op_SqrtVD:
 3696     case Op_LShiftVI:
 3697     case Op_LShiftVL:
 3698     case Op_RShiftVI:
 3699     case Op_RShiftVL:
 3700     case Op_URShiftVI:
 3701     case Op_URShiftVL:
 3702     case Op_LoadVectorMasked:
 3703     case Op_StoreVectorMasked:
 3704     case Op_LoadVectorGatherMasked:
 3705     case Op_StoreVectorScatterMasked:
 3706       return true;
 3707 
 3708     case Op_UMinV:
 3709     case Op_UMaxV:
 3710       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3711         return false;
 3712       } // fallthrough
 3713     case Op_MaxV:
 3714     case Op_MinV:
 3715       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3716         return false; // Implementation limitation
 3717       }
 3718       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3719         return false; // Implementation limitation
 3720       }
 3721       return true;
 3722     case Op_SaturatingAddV:
 3723     case Op_SaturatingSubV:
 3724       if (!is_subword_type(bt)) {
 3725         return false;
 3726       }
 3727       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3728         return false; // Implementation limitation
 3729       }
 3730       return true;
 3731 
 3732     case Op_VectorMaskCmp:
 3733       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3734         return false; // Implementation limitation
 3735       }
 3736       return true;
 3737 
 3738     case Op_VectorRearrange:
 3739       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3740         return false; // Implementation limitation
 3741       }
 3742       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3743         return false; // Implementation limitation
 3744       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3745         return false; // Implementation limitation
 3746       }
 3747       return true;
 3748 
 3749     // Binary Logical operations
 3750     case Op_AndVMask:
 3751     case Op_OrVMask:
 3752     case Op_XorVMask:
 3753       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3754         return false; // Implementation limitation
 3755       }
 3756       return true;
 3757 
 3758     case Op_PopCountVI:
 3759     case Op_PopCountVL:
 3760       if (!is_pop_count_instr_target(bt)) {
 3761         return false;
 3762       }
 3763       return true;
 3764 
 3765     case Op_MaskAll:
 3766       return true;
 3767 
 3768     case Op_CountLeadingZerosV:
 3769       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3770         return true;
 3771       }
 3772     default:
 3773       return false;
 3774   }
 3775 }
 3776 
 3777 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3778   return false;
 3779 }
 3780 
 3781 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3782 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3783   switch (elem_bt) {
 3784     case T_BYTE:  return false;
 3785     case T_SHORT: return !VM_Version::supports_avx512bw();
 3786     case T_INT:   return !VM_Version::supports_avx();
 3787     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3788     default:
 3789       ShouldNotReachHere();
 3790       return false;
 3791   }
 3792 }
 3793 
 3794 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3795   // Prefer predicate if the mask type is "TypeVectMask".
 3796   return vt->isa_vectmask() != nullptr;
 3797 }
 3798 
 3799 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3800   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3801   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3802   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3803       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3804     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3805     return new legVecZOper();
 3806   }
 3807   if (legacy) {
 3808     switch (ideal_reg) {
 3809       case Op_VecS: return new legVecSOper();
 3810       case Op_VecD: return new legVecDOper();
 3811       case Op_VecX: return new legVecXOper();
 3812       case Op_VecY: return new legVecYOper();
 3813       case Op_VecZ: return new legVecZOper();
 3814     }
 3815   } else {
 3816     switch (ideal_reg) {
 3817       case Op_VecS: return new vecSOper();
 3818       case Op_VecD: return new vecDOper();
 3819       case Op_VecX: return new vecXOper();
 3820       case Op_VecY: return new vecYOper();
 3821       case Op_VecZ: return new vecZOper();
 3822     }
 3823   }
 3824   ShouldNotReachHere();
 3825   return nullptr;
 3826 }
 3827 
 3828 bool Matcher::is_reg2reg_move(MachNode* m) {
 3829   switch (m->rule()) {
 3830     case MoveVec2Leg_rule:
 3831     case MoveLeg2Vec_rule:
 3832     case MoveF2VL_rule:
 3833     case MoveF2LEG_rule:
 3834     case MoveVL2F_rule:
 3835     case MoveLEG2F_rule:
 3836     case MoveD2VL_rule:
 3837     case MoveD2LEG_rule:
 3838     case MoveVL2D_rule:
 3839     case MoveLEG2D_rule:
 3840       return true;
 3841     default:
 3842       return false;
 3843   }
 3844 }
 3845 
 3846 bool Matcher::is_generic_vector(MachOper* opnd) {
 3847   switch (opnd->opcode()) {
 3848     case VEC:
 3849     case LEGVEC:
 3850       return true;
 3851     default:
 3852       return false;
 3853   }
 3854 }
 3855 
 3856 //------------------------------------------------------------------------
 3857 
 3858 const RegMask* Matcher::predicate_reg_mask(void) {
 3859   return &_VECTMASK_REG_mask;
 3860 }
 3861 
 3862 // Max vector size in bytes. 0 if not supported.
 3863 int Matcher::vector_width_in_bytes(BasicType bt) {
 3864   assert(is_java_primitive(bt), "only primitive type vectors");
 3865   // SSE2 supports 128bit vectors for all types.
 3866   // AVX2 supports 256bit vectors for all types.
 3867   // AVX2/EVEX supports 512bit vectors for all types.
 3868   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3869   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3870   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3871     size = (UseAVX > 2) ? 64 : 32;
 3872   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3873     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3874   // Use flag to limit vector size.
 3875   size = MIN2(size,(int)MaxVectorSize);
 3876   // Minimum 2 values in vector (or 4 for bytes).
 3877   switch (bt) {
 3878   case T_DOUBLE:
 3879   case T_LONG:
 3880     if (size < 16) return 0;
 3881     break;
 3882   case T_FLOAT:
 3883   case T_INT:
 3884     if (size < 8) return 0;
 3885     break;
 3886   case T_BOOLEAN:
 3887     if (size < 4) return 0;
 3888     break;
 3889   case T_CHAR:
 3890     if (size < 4) return 0;
 3891     break;
 3892   case T_BYTE:
 3893     if (size < 4) return 0;
 3894     break;
 3895   case T_SHORT:
 3896     if (size < 4) return 0;
 3897     break;
 3898   default:
 3899     ShouldNotReachHere();
 3900   }
 3901   return size;
 3902 }
 3903 
 3904 // Limits on vector size (number of elements) loaded into vector.
 3905 int Matcher::max_vector_size(const BasicType bt) {
 3906   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3907 }
 3908 int Matcher::min_vector_size(const BasicType bt) {
 3909   int max_size = max_vector_size(bt);
 3910   // Min size which can be loaded into vector is 4 bytes.
 3911   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3912   // Support for calling svml double64 vectors
 3913   if (bt == T_DOUBLE) {
 3914     size = 1;
 3915   }
 3916   return MIN2(size,max_size);
 3917 }
 3918 
 3919 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3920   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3921   // by default on Cascade Lake
 3922   if (VM_Version::is_default_intel_cascade_lake()) {
 3923     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3924   }
 3925   return Matcher::max_vector_size(bt);
 3926 }
 3927 
 3928 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3929   return -1;
 3930 }
 3931 
 3932 // Vector ideal reg corresponding to specified size in bytes
 3933 uint Matcher::vector_ideal_reg(int size) {
 3934   assert(MaxVectorSize >= size, "");
 3935   switch(size) {
 3936     case  4: return Op_VecS;
 3937     case  8: return Op_VecD;
 3938     case 16: return Op_VecX;
 3939     case 32: return Op_VecY;
 3940     case 64: return Op_VecZ;
 3941   }
 3942   ShouldNotReachHere();
 3943   return 0;
 3944 }
 3945 
 3946 // Check for shift by small constant as well
 3947 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3948   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3949       shift->in(2)->get_int() <= 3 &&
 3950       // Are there other uses besides address expressions?
 3951       !matcher->is_visited(shift)) {
 3952     address_visited.set(shift->_idx); // Flag as address_visited
 3953     mstack.push(shift->in(2), Matcher::Visit);
 3954     Node *conv = shift->in(1);
 3955     // Allow Matcher to match the rule which bypass
 3956     // ConvI2L operation for an array index on LP64
 3957     // if the index value is positive.
 3958     if (conv->Opcode() == Op_ConvI2L &&
 3959         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3960         // Are there other uses besides address expressions?
 3961         !matcher->is_visited(conv)) {
 3962       address_visited.set(conv->_idx); // Flag as address_visited
 3963       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3964     } else {
 3965       mstack.push(conv, Matcher::Pre_Visit);
 3966     }
 3967     return true;
 3968   }
 3969   return false;
 3970 }
 3971 
 3972 // This function identifies sub-graphs in which a 'load' node is
 3973 // input to two different nodes, and such that it can be matched
 3974 // with BMI instructions like blsi, blsr, etc.
 3975 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3976 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3977 // refers to the same node.
 3978 //
 3979 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3980 // This is a temporary solution until we make DAGs expressible in ADL.
 3981 template<typename ConType>
 3982 class FusedPatternMatcher {
 3983   Node* _op1_node;
 3984   Node* _mop_node;
 3985   int _con_op;
 3986 
 3987   static int match_next(Node* n, int next_op, int next_op_idx) {
 3988     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3989       return -1;
 3990     }
 3991 
 3992     if (next_op_idx == -1) { // n is commutative, try rotations
 3993       if (n->in(1)->Opcode() == next_op) {
 3994         return 1;
 3995       } else if (n->in(2)->Opcode() == next_op) {
 3996         return 2;
 3997       }
 3998     } else {
 3999       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 4000       if (n->in(next_op_idx)->Opcode() == next_op) {
 4001         return next_op_idx;
 4002       }
 4003     }
 4004     return -1;
 4005   }
 4006 
 4007  public:
 4008   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 4009     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 4010 
 4011   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 4012              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4013              typename ConType::NativeType con_value) {
 4014     if (_op1_node->Opcode() != op1) {
 4015       return false;
 4016     }
 4017     if (_mop_node->outcnt() > 2) {
 4018       return false;
 4019     }
 4020     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4021     if (op1_op2_idx == -1) {
 4022       return false;
 4023     }
 4024     // Memory operation must be the other edge
 4025     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4026 
 4027     // Check that the mop node is really what we want
 4028     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4029       Node* op2_node = _op1_node->in(op1_op2_idx);
 4030       if (op2_node->outcnt() > 1) {
 4031         return false;
 4032       }
 4033       assert(op2_node->Opcode() == op2, "Should be");
 4034       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4035       if (op2_con_idx == -1) {
 4036         return false;
 4037       }
 4038       // Memory operation must be the other edge
 4039       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4040       // Check that the memory operation is the same node
 4041       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4042         // Now check the constant
 4043         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4044         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4045           return true;
 4046         }
 4047       }
 4048     }
 4049     return false;
 4050   }
 4051 };
 4052 
 4053 static bool is_bmi_pattern(Node* n, Node* m) {
 4054   assert(UseBMI1Instructions, "sanity");
 4055   if (n != nullptr && m != nullptr) {
 4056     if (m->Opcode() == Op_LoadI) {
 4057       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4058       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4059              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4060              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4061     } else if (m->Opcode() == Op_LoadL) {
 4062       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4063       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4064              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4065              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4066     }
 4067   }
 4068   return false;
 4069 }
 4070 
 4071 // Should the matcher clone input 'm' of node 'n'?
 4072 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4073   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4074   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
 4075     mstack.push(m, Visit);
 4076     return true;
 4077   }
 4078   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4079     mstack.push(m, Visit);           // m = ShiftCntV
 4080     return true;
 4081   }
 4082   if (is_encode_and_store_pattern(n, m)) {
 4083     mstack.push(m, Visit);
 4084     return true;
 4085   }
 4086   return false;
 4087 }
 4088 
 4089 // Should the Matcher clone shifts on addressing modes, expecting them
 4090 // to be subsumed into complex addressing expressions or compute them
 4091 // into registers?
 4092 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4093   Node *off = m->in(AddPNode::Offset);
 4094   if (off->is_Con()) {
 4095     address_visited.test_set(m->_idx); // Flag as address_visited
 4096     Node *adr = m->in(AddPNode::Address);
 4097 
 4098     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4099     // AtomicAdd is not an addressing expression.
 4100     // Cheap to find it by looking for screwy base.
 4101     if (adr->is_AddP() &&
 4102         !adr->in(AddPNode::Base)->is_top() &&
 4103         !adr->in(AddPNode::Offset)->is_Con() &&
 4104         off->get_long() == (int) (off->get_long()) && // immL32
 4105         // Are there other uses besides address expressions?
 4106         !is_visited(adr)) {
 4107       address_visited.set(adr->_idx); // Flag as address_visited
 4108       Node *shift = adr->in(AddPNode::Offset);
 4109       if (!clone_shift(shift, this, mstack, address_visited)) {
 4110         mstack.push(shift, Pre_Visit);
 4111       }
 4112       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4113       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4114     } else {
 4115       mstack.push(adr, Pre_Visit);
 4116     }
 4117 
 4118     // Clone X+offset as it also folds into most addressing expressions
 4119     mstack.push(off, Visit);
 4120     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4121     return true;
 4122   } else if (clone_shift(off, this, mstack, address_visited)) {
 4123     address_visited.test_set(m->_idx); // Flag as address_visited
 4124     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4125     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4126     return true;
 4127   }
 4128   return false;
 4129 }
 4130 
 4131 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4132   switch (bt) {
 4133     case BoolTest::eq:
 4134       return Assembler::eq;
 4135     case BoolTest::ne:
 4136       return Assembler::neq;
 4137     case BoolTest::le:
 4138     case BoolTest::ule:
 4139       return Assembler::le;
 4140     case BoolTest::ge:
 4141     case BoolTest::uge:
 4142       return Assembler::nlt;
 4143     case BoolTest::lt:
 4144     case BoolTest::ult:
 4145       return Assembler::lt;
 4146     case BoolTest::gt:
 4147     case BoolTest::ugt:
 4148       return Assembler::nle;
 4149     default : ShouldNotReachHere(); return Assembler::_false;
 4150   }
 4151 }
 4152 
 4153 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4154   switch (bt) {
 4155   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4156   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4157   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4158   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4159   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4160   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4161   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4162   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4163   }
 4164 }
 4165 
 4166 // Helper methods for MachSpillCopyNode::implementation().
 4167 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4168                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4169   assert(ireg == Op_VecS || // 32bit vector
 4170          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4171           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4172          "no non-adjacent vector moves" );
 4173   if (masm) {
 4174     switch (ireg) {
 4175     case Op_VecS: // copy whole register
 4176     case Op_VecD:
 4177     case Op_VecX:
 4178       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4179         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4180       } else {
 4181         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4182      }
 4183       break;
 4184     case Op_VecY:
 4185       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4186         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4187       } else {
 4188         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4189      }
 4190       break;
 4191     case Op_VecZ:
 4192       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4193       break;
 4194     default:
 4195       ShouldNotReachHere();
 4196     }
 4197 #ifndef PRODUCT
 4198   } else {
 4199     switch (ireg) {
 4200     case Op_VecS:
 4201     case Op_VecD:
 4202     case Op_VecX:
 4203       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4204       break;
 4205     case Op_VecY:
 4206     case Op_VecZ:
 4207       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4208       break;
 4209     default:
 4210       ShouldNotReachHere();
 4211     }
 4212 #endif
 4213   }
 4214 }
 4215 
 4216 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4217                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4218   if (masm) {
 4219     if (is_load) {
 4220       switch (ireg) {
 4221       case Op_VecS:
 4222         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4223         break;
 4224       case Op_VecD:
 4225         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4226         break;
 4227       case Op_VecX:
 4228         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4229           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4230         } else {
 4231           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4232           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4233         }
 4234         break;
 4235       case Op_VecY:
 4236         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4237           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4238         } else {
 4239           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4240           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4241         }
 4242         break;
 4243       case Op_VecZ:
 4244         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4245         break;
 4246       default:
 4247         ShouldNotReachHere();
 4248       }
 4249     } else { // store
 4250       switch (ireg) {
 4251       case Op_VecS:
 4252         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4253         break;
 4254       case Op_VecD:
 4255         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4256         break;
 4257       case Op_VecX:
 4258         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4259           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4260         }
 4261         else {
 4262           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4263         }
 4264         break;
 4265       case Op_VecY:
 4266         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4267           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4268         }
 4269         else {
 4270           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4271         }
 4272         break;
 4273       case Op_VecZ:
 4274         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4275         break;
 4276       default:
 4277         ShouldNotReachHere();
 4278       }
 4279     }
 4280 #ifndef PRODUCT
 4281   } else {
 4282     if (is_load) {
 4283       switch (ireg) {
 4284       case Op_VecS:
 4285         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4286         break;
 4287       case Op_VecD:
 4288         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4289         break;
 4290        case Op_VecX:
 4291         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4292         break;
 4293       case Op_VecY:
 4294       case Op_VecZ:
 4295         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4296         break;
 4297       default:
 4298         ShouldNotReachHere();
 4299       }
 4300     } else { // store
 4301       switch (ireg) {
 4302       case Op_VecS:
 4303         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4304         break;
 4305       case Op_VecD:
 4306         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4307         break;
 4308        case Op_VecX:
 4309         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4310         break;
 4311       case Op_VecY:
 4312       case Op_VecZ:
 4313         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4314         break;
 4315       default:
 4316         ShouldNotReachHere();
 4317       }
 4318     }
 4319 #endif
 4320   }
 4321 }
 4322 
 4323 template <class T>
 4324 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4325   int size = type2aelembytes(bt) * len;
 4326   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4327   for (int i = 0; i < len; i++) {
 4328     int offset = i * type2aelembytes(bt);
 4329     switch (bt) {
 4330       case T_BYTE: val->at(i) = con; break;
 4331       case T_SHORT: {
 4332         jshort c = con;
 4333         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4334         break;
 4335       }
 4336       case T_INT: {
 4337         jint c = con;
 4338         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4339         break;
 4340       }
 4341       case T_LONG: {
 4342         jlong c = con;
 4343         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4344         break;
 4345       }
 4346       case T_FLOAT: {
 4347         jfloat c = con;
 4348         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4349         break;
 4350       }
 4351       case T_DOUBLE: {
 4352         jdouble c = con;
 4353         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4354         break;
 4355       }
 4356       default: assert(false, "%s", type2name(bt));
 4357     }
 4358   }
 4359   return val;
 4360 }
 4361 
 4362 static inline jlong high_bit_set(BasicType bt) {
 4363   switch (bt) {
 4364     case T_BYTE:  return 0x8080808080808080;
 4365     case T_SHORT: return 0x8000800080008000;
 4366     case T_INT:   return 0x8000000080000000;
 4367     case T_LONG:  return 0x8000000000000000;
 4368     default:
 4369       ShouldNotReachHere();
 4370       return 0;
 4371   }
 4372 }
 4373 
 4374 #ifndef PRODUCT
 4375   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4376     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4377   }
 4378 #endif
 4379 
 4380   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4381     __ nop(_count);
 4382   }
 4383 
 4384   uint MachNopNode::size(PhaseRegAlloc*) const {
 4385     return _count;
 4386   }
 4387 
 4388 #ifndef PRODUCT
 4389   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4390     st->print("# breakpoint");
 4391   }
 4392 #endif
 4393 
 4394   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4395     __ int3();
 4396   }
 4397 
 4398   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4399     return MachNode::size(ra_);
 4400   }
 4401 
 4402 %}
 4403 
 4404 //----------ENCODING BLOCK-----------------------------------------------------
 4405 // This block specifies the encoding classes used by the compiler to
 4406 // output byte streams.  Encoding classes are parameterized macros
 4407 // used by Machine Instruction Nodes in order to generate the bit
 4408 // encoding of the instruction.  Operands specify their base encoding
 4409 // interface with the interface keyword.  There are currently
 4410 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4411 // COND_INTER.  REG_INTER causes an operand to generate a function
 4412 // which returns its register number when queried.  CONST_INTER causes
 4413 // an operand to generate a function which returns the value of the
 4414 // constant when queried.  MEMORY_INTER causes an operand to generate
 4415 // four functions which return the Base Register, the Index Register,
 4416 // the Scale Value, and the Offset Value of the operand when queried.
 4417 // COND_INTER causes an operand to generate six functions which return
 4418 // the encoding code (ie - encoding bits for the instruction)
 4419 // associated with each basic boolean condition for a conditional
 4420 // instruction.
 4421 //
 4422 // Instructions specify two basic values for encoding.  Again, a
 4423 // function is available to check if the constant displacement is an
 4424 // oop. They use the ins_encode keyword to specify their encoding
 4425 // classes (which must be a sequence of enc_class names, and their
 4426 // parameters, specified in the encoding block), and they use the
 4427 // opcode keyword to specify, in order, their primary, secondary, and
 4428 // tertiary opcode.  Only the opcode sections which a particular
 4429 // instruction needs for encoding need to be specified.
 4430 encode %{
 4431   enc_class cdql_enc(no_rax_rdx_RegI div)
 4432   %{
 4433     // Full implementation of Java idiv and irem; checks for
 4434     // special case as described in JVM spec., p.243 & p.271.
 4435     //
 4436     //         normal case                           special case
 4437     //
 4438     // input : rax: dividend                         min_int
 4439     //         reg: divisor                          -1
 4440     //
 4441     // output: rax: quotient  (= rax idiv reg)       min_int
 4442     //         rdx: remainder (= rax irem reg)       0
 4443     //
 4444     //  Code sequnce:
 4445     //
 4446     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4447     //    5:   75 07/08                jne    e <normal>
 4448     //    7:   33 d2                   xor    %edx,%edx
 4449     //  [div >= 8 -> offset + 1]
 4450     //  [REX_B]
 4451     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4452     //    c:   74 03/04                je     11 <done>
 4453     // 000000000000000e <normal>:
 4454     //    e:   99                      cltd
 4455     //  [div >= 8 -> offset + 1]
 4456     //  [REX_B]
 4457     //    f:   f7 f9                   idiv   $div
 4458     // 0000000000000011 <done>:
 4459     Label normal;
 4460     Label done;
 4461 
 4462     // cmp    $0x80000000,%eax
 4463     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4464 
 4465     // jne    e <normal>
 4466     __ jccb(Assembler::notEqual, normal);
 4467 
 4468     // xor    %edx,%edx
 4469     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4470 
 4471     // cmp    $0xffffffffffffffff,%ecx
 4472     __ cmpl($div$$Register, -1);
 4473 
 4474     // je     11 <done>
 4475     __ jccb(Assembler::equal, done);
 4476 
 4477     // <normal>
 4478     // cltd
 4479     __ bind(normal);
 4480     __ cdql();
 4481 
 4482     // idivl
 4483     // <done>
 4484     __ idivl($div$$Register);
 4485     __ bind(done);
 4486   %}
 4487 
 4488   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4489   %{
 4490     // Full implementation of Java ldiv and lrem; checks for
 4491     // special case as described in JVM spec., p.243 & p.271.
 4492     //
 4493     //         normal case                           special case
 4494     //
 4495     // input : rax: dividend                         min_long
 4496     //         reg: divisor                          -1
 4497     //
 4498     // output: rax: quotient  (= rax idiv reg)       min_long
 4499     //         rdx: remainder (= rax irem reg)       0
 4500     //
 4501     //  Code sequnce:
 4502     //
 4503     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4504     //    7:   00 00 80
 4505     //    a:   48 39 d0                cmp    %rdx,%rax
 4506     //    d:   75 08                   jne    17 <normal>
 4507     //    f:   33 d2                   xor    %edx,%edx
 4508     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4509     //   15:   74 05                   je     1c <done>
 4510     // 0000000000000017 <normal>:
 4511     //   17:   48 99                   cqto
 4512     //   19:   48 f7 f9                idiv   $div
 4513     // 000000000000001c <done>:
 4514     Label normal;
 4515     Label done;
 4516 
 4517     // mov    $0x8000000000000000,%rdx
 4518     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4519 
 4520     // cmp    %rdx,%rax
 4521     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4522 
 4523     // jne    17 <normal>
 4524     __ jccb(Assembler::notEqual, normal);
 4525 
 4526     // xor    %edx,%edx
 4527     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4528 
 4529     // cmp    $0xffffffffffffffff,$div
 4530     __ cmpq($div$$Register, -1);
 4531 
 4532     // je     1e <done>
 4533     __ jccb(Assembler::equal, done);
 4534 
 4535     // <normal>
 4536     // cqto
 4537     __ bind(normal);
 4538     __ cdqq();
 4539 
 4540     // idivq (note: must be emitted by the user of this rule)
 4541     // <done>
 4542     __ idivq($div$$Register);
 4543     __ bind(done);
 4544   %}
 4545 
 4546   enc_class clear_avx %{
 4547     DEBUG_ONLY(int off0 = __ offset());
 4548     if (generate_vzeroupper(Compile::current())) {
 4549       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4550       // Clear upper bits of YMM registers when current compiled code uses
 4551       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4552       __ vzeroupper();
 4553     }
 4554     DEBUG_ONLY(int off1 = __ offset());
 4555     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4556   %}
 4557 
 4558   enc_class Java_To_Runtime(method meth) %{
 4559     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4560     __ call(r10);
 4561     __ post_call_nop();
 4562   %}
 4563 
 4564   enc_class Java_Static_Call(method meth)
 4565   %{
 4566     // JAVA STATIC CALL
 4567     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4568     // determine who we intended to call.
 4569     if (!_method) {
 4570       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4571     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4572       // The NOP here is purely to ensure that eliding a call to
 4573       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4574       __ addr_nop_5();
 4575       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4576     } else {
 4577       int method_index = resolved_method_index(masm);
 4578       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4579                                                   : static_call_Relocation::spec(method_index);
 4580       address mark = __ pc();
 4581       int call_offset = __ offset();
 4582       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4583       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4584         // Calls of the same statically bound method can share
 4585         // a stub to the interpreter.
 4586         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4587       } else {
 4588         // Emit stubs for static call.
 4589         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4590         __ clear_inst_mark();
 4591         if (stub == nullptr) {
 4592           ciEnv::current()->record_failure("CodeCache is full");
 4593           return;
 4594         }
 4595       }
 4596     }
 4597     __ post_call_nop();
 4598   %}
 4599 
 4600   enc_class Java_Dynamic_Call(method meth) %{
 4601     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4602     __ post_call_nop();
 4603   %}
 4604 
 4605   enc_class call_epilog %{
 4606     if (VerifyStackAtCalls) {
 4607       // Check that stack depth is unchanged: find majik cookie on stack
 4608       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4609       Label L;
 4610       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4611       __ jccb(Assembler::equal, L);
 4612       // Die if stack mismatch
 4613       __ int3();
 4614       __ bind(L);
 4615     }
 4616     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4617       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4618       // Search for the corresponding projection, get the register and emit code that initialized it.
 4619       uint con = (tf()->range_cc()->cnt() - 1);
 4620       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4621         ProjNode* proj = fast_out(i)->as_Proj();
 4622         if (proj->_con == con) {
 4623           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4624           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4625           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4626           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4627           __ testq(rax, rax);
 4628           __ setb(Assembler::notZero, toReg);
 4629           __ movzbl(toReg, toReg);
 4630           if (reg->is_stack()) {
 4631             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4632             __ movq(Address(rsp, st_off), toReg);
 4633           }
 4634           break;
 4635         }
 4636       }
 4637       if (return_value_is_used()) {
 4638         // An inline type is returned as fields in multiple registers.
 4639         // Rax either contains an oop if the inline type is buffered or a pointer
 4640         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4641         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4642         // rax &= (rax & 1) - 1
 4643         __ movptr(rscratch1, rax);
 4644         __ andptr(rscratch1, 0x1);
 4645         __ subptr(rscratch1, 0x1);
 4646         __ andptr(rax, rscratch1);
 4647       }
 4648     }
 4649   %}
 4650 
 4651 %}
 4652 
 4653 //----------FRAME--------------------------------------------------------------
 4654 // Definition of frame structure and management information.
 4655 //
 4656 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4657 //                             |   (to get allocators register number
 4658 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4659 //  r   CALLER     |        |
 4660 //  o     |        +--------+      pad to even-align allocators stack-slot
 4661 //  w     V        |  pad0  |        numbers; owned by CALLER
 4662 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4663 //  h     ^        |   in   |  5
 4664 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4665 //  |     |        |        |  3
 4666 //  |     |        +--------+
 4667 //  V     |        | old out|      Empty on Intel, window on Sparc
 4668 //        |    old |preserve|      Must be even aligned.
 4669 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4670 //        |        |   in   |  3   area for Intel ret address
 4671 //     Owned by    |preserve|      Empty on Sparc.
 4672 //       SELF      +--------+
 4673 //        |        |  pad2  |  2   pad to align old SP
 4674 //        |        +--------+  1
 4675 //        |        | locks  |  0
 4676 //        |        +--------+----> OptoReg::stack0(), even aligned
 4677 //        |        |  pad1  | 11   pad to align new SP
 4678 //        |        +--------+
 4679 //        |        |        | 10
 4680 //        |        | spills |  9   spills
 4681 //        V        |        |  8   (pad0 slot for callee)
 4682 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4683 //        ^        |  out   |  7
 4684 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4685 //     Owned by    +--------+
 4686 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4687 //        |    new |preserve|      Must be even-aligned.
 4688 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4689 //        |        |        |
 4690 //
 4691 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4692 //         known from SELF's arguments and the Java calling convention.
 4693 //         Region 6-7 is determined per call site.
 4694 // Note 2: If the calling convention leaves holes in the incoming argument
 4695 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4696 //         are owned by the CALLEE.  Holes should not be necessary in the
 4697 //         incoming area, as the Java calling convention is completely under
 4698 //         the control of the AD file.  Doubles can be sorted and packed to
 4699 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4700 //         varargs C calling conventions.
 4701 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4702 //         even aligned with pad0 as needed.
 4703 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4704 //         region 6-11 is even aligned; it may be padded out more so that
 4705 //         the region from SP to FP meets the minimum stack alignment.
 4706 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4707 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4708 //         SP meets the minimum alignment.
 4709 
 4710 frame
 4711 %{
 4712   // These three registers define part of the calling convention
 4713   // between compiled code and the interpreter.
 4714   inline_cache_reg(RAX);                // Inline Cache Register
 4715 
 4716   // Optional: name the operand used by cisc-spilling to access
 4717   // [stack_pointer + offset]
 4718   cisc_spilling_operand_name(indOffset32);
 4719 
 4720   // Number of stack slots consumed by locking an object
 4721   sync_stack_slots(2);
 4722 
 4723   // Compiled code's Frame Pointer
 4724   frame_pointer(RSP);
 4725 
 4726   // Interpreter stores its frame pointer in a register which is
 4727   // stored to the stack by I2CAdaptors.
 4728   // I2CAdaptors convert from interpreted java to compiled java.
 4729   interpreter_frame_pointer(RBP);
 4730 
 4731   // Stack alignment requirement
 4732   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4733 
 4734   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4735   // for calls to C.  Supports the var-args backing area for register parms.
 4736   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4737 
 4738   // The after-PROLOG location of the return address.  Location of
 4739   // return address specifies a type (REG or STACK) and a number
 4740   // representing the register number (i.e. - use a register name) or
 4741   // stack slot.
 4742   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4743   // Otherwise, it is above the locks and verification slot and alignment word
 4744   return_addr(STACK - 2 +
 4745               align_up((Compile::current()->in_preserve_stack_slots() +
 4746                         Compile::current()->fixed_slots()),
 4747                        stack_alignment_in_slots()));
 4748 
 4749   // Location of compiled Java return values.  Same as C for now.
 4750   return_value
 4751   %{
 4752     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4753            "only return normal values");
 4754 
 4755     static const int lo[Op_RegL + 1] = {
 4756       0,
 4757       0,
 4758       RAX_num,  // Op_RegN
 4759       RAX_num,  // Op_RegI
 4760       RAX_num,  // Op_RegP
 4761       XMM0_num, // Op_RegF
 4762       XMM0_num, // Op_RegD
 4763       RAX_num   // Op_RegL
 4764     };
 4765     static const int hi[Op_RegL + 1] = {
 4766       0,
 4767       0,
 4768       OptoReg::Bad, // Op_RegN
 4769       OptoReg::Bad, // Op_RegI
 4770       RAX_H_num,    // Op_RegP
 4771       OptoReg::Bad, // Op_RegF
 4772       XMM0b_num,    // Op_RegD
 4773       RAX_H_num     // Op_RegL
 4774     };
 4775     // Excluded flags and vector registers.
 4776     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4777     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4778   %}
 4779 %}
 4780 
 4781 //----------ATTRIBUTES---------------------------------------------------------
 4782 //----------Operand Attributes-------------------------------------------------
 4783 op_attrib op_cost(0);        // Required cost attribute
 4784 
 4785 //----------Instruction Attributes---------------------------------------------
 4786 ins_attrib ins_cost(100);       // Required cost attribute
 4787 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4788 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4789                                 // a non-matching short branch variant
 4790                                 // of some long branch?
 4791 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4792                                 // be a power of 2) specifies the
 4793                                 // alignment that some part of the
 4794                                 // instruction (not necessarily the
 4795                                 // start) requires.  If > 1, a
 4796                                 // compute_padding() function must be
 4797                                 // provided for the instruction
 4798 
 4799 // Whether this node is expanded during code emission into a sequence of
 4800 // instructions and the first instruction can perform an implicit null check.
 4801 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4802 
 4803 //----------OPERANDS-----------------------------------------------------------
 4804 // Operand definitions must precede instruction definitions for correct parsing
 4805 // in the ADLC because operands constitute user defined types which are used in
 4806 // instruction definitions.
 4807 
 4808 //----------Simple Operands----------------------------------------------------
 4809 // Immediate Operands
 4810 // Integer Immediate
 4811 operand immI()
 4812 %{
 4813   match(ConI);
 4814 
 4815   op_cost(10);
 4816   format %{ %}
 4817   interface(CONST_INTER);
 4818 %}
 4819 
 4820 // Constant for test vs zero
 4821 operand immI_0()
 4822 %{
 4823   predicate(n->get_int() == 0);
 4824   match(ConI);
 4825 
 4826   op_cost(0);
 4827   format %{ %}
 4828   interface(CONST_INTER);
 4829 %}
 4830 
 4831 // Constant for increment
 4832 operand immI_1()
 4833 %{
 4834   predicate(n->get_int() == 1);
 4835   match(ConI);
 4836 
 4837   op_cost(0);
 4838   format %{ %}
 4839   interface(CONST_INTER);
 4840 %}
 4841 
 4842 // Constant for decrement
 4843 operand immI_M1()
 4844 %{
 4845   predicate(n->get_int() == -1);
 4846   match(ConI);
 4847 
 4848   op_cost(0);
 4849   format %{ %}
 4850   interface(CONST_INTER);
 4851 %}
 4852 
 4853 operand immI_2()
 4854 %{
 4855   predicate(n->get_int() == 2);
 4856   match(ConI);
 4857 
 4858   op_cost(0);
 4859   format %{ %}
 4860   interface(CONST_INTER);
 4861 %}
 4862 
 4863 operand immI_4()
 4864 %{
 4865   predicate(n->get_int() == 4);
 4866   match(ConI);
 4867 
 4868   op_cost(0);
 4869   format %{ %}
 4870   interface(CONST_INTER);
 4871 %}
 4872 
 4873 operand immI_8()
 4874 %{
 4875   predicate(n->get_int() == 8);
 4876   match(ConI);
 4877 
 4878   op_cost(0);
 4879   format %{ %}
 4880   interface(CONST_INTER);
 4881 %}
 4882 
 4883 // Valid scale values for addressing modes
 4884 operand immI2()
 4885 %{
 4886   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4887   match(ConI);
 4888 
 4889   format %{ %}
 4890   interface(CONST_INTER);
 4891 %}
 4892 
 4893 operand immU7()
 4894 %{
 4895   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4896   match(ConI);
 4897 
 4898   op_cost(5);
 4899   format %{ %}
 4900   interface(CONST_INTER);
 4901 %}
 4902 
 4903 operand immI8()
 4904 %{
 4905   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4906   match(ConI);
 4907 
 4908   op_cost(5);
 4909   format %{ %}
 4910   interface(CONST_INTER);
 4911 %}
 4912 
 4913 operand immU8()
 4914 %{
 4915   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4916   match(ConI);
 4917 
 4918   op_cost(5);
 4919   format %{ %}
 4920   interface(CONST_INTER);
 4921 %}
 4922 
 4923 operand immI16()
 4924 %{
 4925   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4926   match(ConI);
 4927 
 4928   op_cost(10);
 4929   format %{ %}
 4930   interface(CONST_INTER);
 4931 %}
 4932 
 4933 // Int Immediate non-negative
 4934 operand immU31()
 4935 %{
 4936   predicate(n->get_int() >= 0);
 4937   match(ConI);
 4938 
 4939   op_cost(0);
 4940   format %{ %}
 4941   interface(CONST_INTER);
 4942 %}
 4943 
 4944 // Pointer Immediate
 4945 operand immP()
 4946 %{
 4947   match(ConP);
 4948 
 4949   op_cost(10);
 4950   format %{ %}
 4951   interface(CONST_INTER);
 4952 %}
 4953 
 4954 // Null Pointer Immediate
 4955 operand immP0()
 4956 %{
 4957   predicate(n->get_ptr() == 0);
 4958   match(ConP);
 4959 
 4960   op_cost(5);
 4961   format %{ %}
 4962   interface(CONST_INTER);
 4963 %}
 4964 
 4965 // Pointer Immediate
 4966 operand immN() %{
 4967   match(ConN);
 4968 
 4969   op_cost(10);
 4970   format %{ %}
 4971   interface(CONST_INTER);
 4972 %}
 4973 
 4974 operand immNKlass() %{
 4975   match(ConNKlass);
 4976 
 4977   op_cost(10);
 4978   format %{ %}
 4979   interface(CONST_INTER);
 4980 %}
 4981 
 4982 // Null Pointer Immediate
 4983 operand immN0() %{
 4984   predicate(n->get_narrowcon() == 0);
 4985   match(ConN);
 4986 
 4987   op_cost(5);
 4988   format %{ %}
 4989   interface(CONST_INTER);
 4990 %}
 4991 
 4992 operand immP31()
 4993 %{
 4994   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4995             && (n->get_ptr() >> 31) == 0);
 4996   match(ConP);
 4997 
 4998   op_cost(5);
 4999   format %{ %}
 5000   interface(CONST_INTER);
 5001 %}
 5002 
 5003 
 5004 // Long Immediate
 5005 operand immL()
 5006 %{
 5007   match(ConL);
 5008 
 5009   op_cost(20);
 5010   format %{ %}
 5011   interface(CONST_INTER);
 5012 %}
 5013 
 5014 // Long Immediate 8-bit
 5015 operand immL8()
 5016 %{
 5017   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 5018   match(ConL);
 5019 
 5020   op_cost(5);
 5021   format %{ %}
 5022   interface(CONST_INTER);
 5023 %}
 5024 
 5025 // Long Immediate 32-bit unsigned
 5026 operand immUL32()
 5027 %{
 5028   predicate(n->get_long() == (unsigned int) (n->get_long()));
 5029   match(ConL);
 5030 
 5031   op_cost(10);
 5032   format %{ %}
 5033   interface(CONST_INTER);
 5034 %}
 5035 
 5036 // Long Immediate 32-bit signed
 5037 operand immL32()
 5038 %{
 5039   predicate(n->get_long() == (int) (n->get_long()));
 5040   match(ConL);
 5041 
 5042   op_cost(15);
 5043   format %{ %}
 5044   interface(CONST_INTER);
 5045 %}
 5046 
 5047 operand immL_Pow2()
 5048 %{
 5049   predicate(is_power_of_2((julong)n->get_long()));
 5050   match(ConL);
 5051 
 5052   op_cost(15);
 5053   format %{ %}
 5054   interface(CONST_INTER);
 5055 %}
 5056 
 5057 operand immL_NotPow2()
 5058 %{
 5059   predicate(is_power_of_2((julong)~n->get_long()));
 5060   match(ConL);
 5061 
 5062   op_cost(15);
 5063   format %{ %}
 5064   interface(CONST_INTER);
 5065 %}
 5066 
 5067 // Long Immediate zero
 5068 operand immL0()
 5069 %{
 5070   predicate(n->get_long() == 0L);
 5071   match(ConL);
 5072 
 5073   op_cost(10);
 5074   format %{ %}
 5075   interface(CONST_INTER);
 5076 %}
 5077 
 5078 // Constant for increment
 5079 operand immL1()
 5080 %{
 5081   predicate(n->get_long() == 1);
 5082   match(ConL);
 5083 
 5084   format %{ %}
 5085   interface(CONST_INTER);
 5086 %}
 5087 
 5088 // Constant for decrement
 5089 operand immL_M1()
 5090 %{
 5091   predicate(n->get_long() == -1);
 5092   match(ConL);
 5093 
 5094   format %{ %}
 5095   interface(CONST_INTER);
 5096 %}
 5097 
 5098 // Long Immediate: low 32-bit mask
 5099 operand immL_32bits()
 5100 %{
 5101   predicate(n->get_long() == 0xFFFFFFFFL);
 5102   match(ConL);
 5103   op_cost(20);
 5104 
 5105   format %{ %}
 5106   interface(CONST_INTER);
 5107 %}
 5108 
 5109 // Int Immediate: 2^n-1, positive
 5110 operand immI_Pow2M1()
 5111 %{
 5112   predicate((n->get_int() > 0)
 5113             && is_power_of_2((juint)n->get_int() + 1));
 5114   match(ConI);
 5115 
 5116   op_cost(20);
 5117   format %{ %}
 5118   interface(CONST_INTER);
 5119 %}
 5120 
 5121 // Float Immediate zero
 5122 operand immF0()
 5123 %{
 5124   predicate(jint_cast(n->getf()) == 0);
 5125   match(ConF);
 5126 
 5127   op_cost(5);
 5128   format %{ %}
 5129   interface(CONST_INTER);
 5130 %}
 5131 
 5132 // Float Immediate
 5133 operand immF()
 5134 %{
 5135   match(ConF);
 5136 
 5137   op_cost(15);
 5138   format %{ %}
 5139   interface(CONST_INTER);
 5140 %}
 5141 
 5142 // Half Float Immediate
 5143 operand immH()
 5144 %{
 5145   match(ConH);
 5146 
 5147   op_cost(15);
 5148   format %{ %}
 5149   interface(CONST_INTER);
 5150 %}
 5151 
 5152 // Double Immediate zero
 5153 operand immD0()
 5154 %{
 5155   predicate(jlong_cast(n->getd()) == 0);
 5156   match(ConD);
 5157 
 5158   op_cost(5);
 5159   format %{ %}
 5160   interface(CONST_INTER);
 5161 %}
 5162 
 5163 // Double Immediate
 5164 operand immD()
 5165 %{
 5166   match(ConD);
 5167 
 5168   op_cost(15);
 5169   format %{ %}
 5170   interface(CONST_INTER);
 5171 %}
 5172 
 5173 // Immediates for special shifts (sign extend)
 5174 
 5175 // Constants for increment
 5176 operand immI_16()
 5177 %{
 5178   predicate(n->get_int() == 16);
 5179   match(ConI);
 5180 
 5181   format %{ %}
 5182   interface(CONST_INTER);
 5183 %}
 5184 
 5185 operand immI_24()
 5186 %{
 5187   predicate(n->get_int() == 24);
 5188   match(ConI);
 5189 
 5190   format %{ %}
 5191   interface(CONST_INTER);
 5192 %}
 5193 
 5194 // Constant for byte-wide masking
 5195 operand immI_255()
 5196 %{
 5197   predicate(n->get_int() == 255);
 5198   match(ConI);
 5199 
 5200   format %{ %}
 5201   interface(CONST_INTER);
 5202 %}
 5203 
 5204 // Constant for short-wide masking
 5205 operand immI_65535()
 5206 %{
 5207   predicate(n->get_int() == 65535);
 5208   match(ConI);
 5209 
 5210   format %{ %}
 5211   interface(CONST_INTER);
 5212 %}
 5213 
 5214 // Constant for byte-wide masking
 5215 operand immL_255()
 5216 %{
 5217   predicate(n->get_long() == 255);
 5218   match(ConL);
 5219 
 5220   format %{ %}
 5221   interface(CONST_INTER);
 5222 %}
 5223 
 5224 // Constant for short-wide masking
 5225 operand immL_65535()
 5226 %{
 5227   predicate(n->get_long() == 65535);
 5228   match(ConL);
 5229 
 5230   format %{ %}
 5231   interface(CONST_INTER);
 5232 %}
 5233 
 5234 operand kReg()
 5235 %{
 5236   constraint(ALLOC_IN_RC(vectmask_reg));
 5237   match(RegVectMask);
 5238   format %{%}
 5239   interface(REG_INTER);
 5240 %}
 5241 
 5242 // Register Operands
 5243 // Integer Register
 5244 operand rRegI()
 5245 %{
 5246   constraint(ALLOC_IN_RC(int_reg));
 5247   match(RegI);
 5248 
 5249   match(rax_RegI);
 5250   match(rbx_RegI);
 5251   match(rcx_RegI);
 5252   match(rdx_RegI);
 5253   match(rdi_RegI);
 5254 
 5255   format %{ %}
 5256   interface(REG_INTER);
 5257 %}
 5258 
 5259 // Special Registers
 5260 operand rax_RegI()
 5261 %{
 5262   constraint(ALLOC_IN_RC(int_rax_reg));
 5263   match(RegI);
 5264   match(rRegI);
 5265 
 5266   format %{ "RAX" %}
 5267   interface(REG_INTER);
 5268 %}
 5269 
 5270 // Special Registers
 5271 operand rbx_RegI()
 5272 %{
 5273   constraint(ALLOC_IN_RC(int_rbx_reg));
 5274   match(RegI);
 5275   match(rRegI);
 5276 
 5277   format %{ "RBX" %}
 5278   interface(REG_INTER);
 5279 %}
 5280 
 5281 operand rcx_RegI()
 5282 %{
 5283   constraint(ALLOC_IN_RC(int_rcx_reg));
 5284   match(RegI);
 5285   match(rRegI);
 5286 
 5287   format %{ "RCX" %}
 5288   interface(REG_INTER);
 5289 %}
 5290 
 5291 operand rdx_RegI()
 5292 %{
 5293   constraint(ALLOC_IN_RC(int_rdx_reg));
 5294   match(RegI);
 5295   match(rRegI);
 5296 
 5297   format %{ "RDX" %}
 5298   interface(REG_INTER);
 5299 %}
 5300 
 5301 operand rdi_RegI()
 5302 %{
 5303   constraint(ALLOC_IN_RC(int_rdi_reg));
 5304   match(RegI);
 5305   match(rRegI);
 5306 
 5307   format %{ "RDI" %}
 5308   interface(REG_INTER);
 5309 %}
 5310 
 5311 operand no_rax_rdx_RegI()
 5312 %{
 5313   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5314   match(RegI);
 5315   match(rbx_RegI);
 5316   match(rcx_RegI);
 5317   match(rdi_RegI);
 5318 
 5319   format %{ %}
 5320   interface(REG_INTER);
 5321 %}
 5322 
 5323 operand no_rbp_r13_RegI()
 5324 %{
 5325   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5326   match(RegI);
 5327   match(rRegI);
 5328   match(rax_RegI);
 5329   match(rbx_RegI);
 5330   match(rcx_RegI);
 5331   match(rdx_RegI);
 5332   match(rdi_RegI);
 5333 
 5334   format %{ %}
 5335   interface(REG_INTER);
 5336 %}
 5337 
 5338 // Pointer Register
 5339 operand any_RegP()
 5340 %{
 5341   constraint(ALLOC_IN_RC(any_reg));
 5342   match(RegP);
 5343   match(rax_RegP);
 5344   match(rbx_RegP);
 5345   match(rdi_RegP);
 5346   match(rsi_RegP);
 5347   match(rbp_RegP);
 5348   match(r15_RegP);
 5349   match(rRegP);
 5350 
 5351   format %{ %}
 5352   interface(REG_INTER);
 5353 %}
 5354 
 5355 operand rRegP()
 5356 %{
 5357   constraint(ALLOC_IN_RC(ptr_reg));
 5358   match(RegP);
 5359   match(rax_RegP);
 5360   match(rbx_RegP);
 5361   match(rdi_RegP);
 5362   match(rsi_RegP);
 5363   match(rbp_RegP);  // See Q&A below about
 5364   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5365 
 5366   format %{ %}
 5367   interface(REG_INTER);
 5368 %}
 5369 
 5370 operand rRegN() %{
 5371   constraint(ALLOC_IN_RC(int_reg));
 5372   match(RegN);
 5373 
 5374   format %{ %}
 5375   interface(REG_INTER);
 5376 %}
 5377 
 5378 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5379 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5380 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5381 // The output of an instruction is controlled by the allocator, which respects
 5382 // register class masks, not match rules.  Unless an instruction mentions
 5383 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5384 // by the allocator as an input.
 5385 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5386 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5387 // result, RBP is not included in the output of the instruction either.
 5388 
 5389 // This operand is not allowed to use RBP even if
 5390 // RBP is not used to hold the frame pointer.
 5391 operand no_rbp_RegP()
 5392 %{
 5393   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5394   match(RegP);
 5395   match(rbx_RegP);
 5396   match(rsi_RegP);
 5397   match(rdi_RegP);
 5398 
 5399   format %{ %}
 5400   interface(REG_INTER);
 5401 %}
 5402 
 5403 // Special Registers
 5404 // Return a pointer value
 5405 operand rax_RegP()
 5406 %{
 5407   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5408   match(RegP);
 5409   match(rRegP);
 5410 
 5411   format %{ %}
 5412   interface(REG_INTER);
 5413 %}
 5414 
 5415 // Special Registers
 5416 // Return a compressed pointer value
 5417 operand rax_RegN()
 5418 %{
 5419   constraint(ALLOC_IN_RC(int_rax_reg));
 5420   match(RegN);
 5421   match(rRegN);
 5422 
 5423   format %{ %}
 5424   interface(REG_INTER);
 5425 %}
 5426 
 5427 // Used in AtomicAdd
 5428 operand rbx_RegP()
 5429 %{
 5430   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5431   match(RegP);
 5432   match(rRegP);
 5433 
 5434   format %{ %}
 5435   interface(REG_INTER);
 5436 %}
 5437 
 5438 operand rsi_RegP()
 5439 %{
 5440   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5441   match(RegP);
 5442   match(rRegP);
 5443 
 5444   format %{ %}
 5445   interface(REG_INTER);
 5446 %}
 5447 
 5448 operand rbp_RegP()
 5449 %{
 5450   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5451   match(RegP);
 5452   match(rRegP);
 5453 
 5454   format %{ %}
 5455   interface(REG_INTER);
 5456 %}
 5457 
 5458 // Used in rep stosq
 5459 operand rdi_RegP()
 5460 %{
 5461   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5462   match(RegP);
 5463   match(rRegP);
 5464 
 5465   format %{ %}
 5466   interface(REG_INTER);
 5467 %}
 5468 
 5469 operand r15_RegP()
 5470 %{
 5471   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5472   match(RegP);
 5473   match(rRegP);
 5474 
 5475   format %{ %}
 5476   interface(REG_INTER);
 5477 %}
 5478 
 5479 operand rRegL()
 5480 %{
 5481   constraint(ALLOC_IN_RC(long_reg));
 5482   match(RegL);
 5483   match(rax_RegL);
 5484   match(rdx_RegL);
 5485 
 5486   format %{ %}
 5487   interface(REG_INTER);
 5488 %}
 5489 
 5490 // Special Registers
 5491 operand no_rax_rdx_RegL()
 5492 %{
 5493   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5494   match(RegL);
 5495   match(rRegL);
 5496 
 5497   format %{ %}
 5498   interface(REG_INTER);
 5499 %}
 5500 
 5501 operand rax_RegL()
 5502 %{
 5503   constraint(ALLOC_IN_RC(long_rax_reg));
 5504   match(RegL);
 5505   match(rRegL);
 5506 
 5507   format %{ "RAX" %}
 5508   interface(REG_INTER);
 5509 %}
 5510 
 5511 operand rcx_RegL()
 5512 %{
 5513   constraint(ALLOC_IN_RC(long_rcx_reg));
 5514   match(RegL);
 5515   match(rRegL);
 5516 
 5517   format %{ %}
 5518   interface(REG_INTER);
 5519 %}
 5520 
 5521 operand rdx_RegL()
 5522 %{
 5523   constraint(ALLOC_IN_RC(long_rdx_reg));
 5524   match(RegL);
 5525   match(rRegL);
 5526 
 5527   format %{ %}
 5528   interface(REG_INTER);
 5529 %}
 5530 
 5531 operand r11_RegL()
 5532 %{
 5533   constraint(ALLOC_IN_RC(long_r11_reg));
 5534   match(RegL);
 5535   match(rRegL);
 5536 
 5537   format %{ %}
 5538   interface(REG_INTER);
 5539 %}
 5540 
 5541 operand no_rbp_r13_RegL()
 5542 %{
 5543   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5544   match(RegL);
 5545   match(rRegL);
 5546   match(rax_RegL);
 5547   match(rcx_RegL);
 5548   match(rdx_RegL);
 5549 
 5550   format %{ %}
 5551   interface(REG_INTER);
 5552 %}
 5553 
 5554 // Flags register, used as output of compare instructions
 5555 operand rFlagsReg()
 5556 %{
 5557   constraint(ALLOC_IN_RC(int_flags));
 5558   match(RegFlags);
 5559 
 5560   format %{ "RFLAGS" %}
 5561   interface(REG_INTER);
 5562 %}
 5563 
 5564 // Flags register, used as output of FLOATING POINT compare instructions
 5565 operand rFlagsRegU()
 5566 %{
 5567   constraint(ALLOC_IN_RC(int_flags));
 5568   match(RegFlags);
 5569 
 5570   format %{ "RFLAGS_U" %}
 5571   interface(REG_INTER);
 5572 %}
 5573 
 5574 operand rFlagsRegUCF() %{
 5575   constraint(ALLOC_IN_RC(int_flags));
 5576   match(RegFlags);
 5577   predicate(!UseAPX || !VM_Version::supports_avx10_2());
 5578 
 5579   format %{ "RFLAGS_U_CF" %}
 5580   interface(REG_INTER);
 5581 %}
 5582 
 5583 operand rFlagsRegUCFE() %{
 5584   constraint(ALLOC_IN_RC(int_flags));
 5585   match(RegFlags);
 5586   predicate(UseAPX && VM_Version::supports_avx10_2());
 5587 
 5588   format %{ "RFLAGS_U_CFE" %}
 5589   interface(REG_INTER);
 5590 %}
 5591 
 5592 // Float register operands
 5593 operand regF() %{
 5594    constraint(ALLOC_IN_RC(float_reg));
 5595    match(RegF);
 5596 
 5597    format %{ %}
 5598    interface(REG_INTER);
 5599 %}
 5600 
 5601 // Float register operands
 5602 operand legRegF() %{
 5603    constraint(ALLOC_IN_RC(float_reg_legacy));
 5604    match(RegF);
 5605 
 5606    format %{ %}
 5607    interface(REG_INTER);
 5608 %}
 5609 
 5610 // Float register operands
 5611 operand vlRegF() %{
 5612    constraint(ALLOC_IN_RC(float_reg_vl));
 5613    match(RegF);
 5614 
 5615    format %{ %}
 5616    interface(REG_INTER);
 5617 %}
 5618 
 5619 // Double register operands
 5620 operand regD() %{
 5621    constraint(ALLOC_IN_RC(double_reg));
 5622    match(RegD);
 5623 
 5624    format %{ %}
 5625    interface(REG_INTER);
 5626 %}
 5627 
 5628 // Double register operands
 5629 operand legRegD() %{
 5630    constraint(ALLOC_IN_RC(double_reg_legacy));
 5631    match(RegD);
 5632 
 5633    format %{ %}
 5634    interface(REG_INTER);
 5635 %}
 5636 
 5637 // Double register operands
 5638 operand vlRegD() %{
 5639    constraint(ALLOC_IN_RC(double_reg_vl));
 5640    match(RegD);
 5641 
 5642    format %{ %}
 5643    interface(REG_INTER);
 5644 %}
 5645 
 5646 //----------Memory Operands----------------------------------------------------
 5647 // Direct Memory Operand
 5648 // operand direct(immP addr)
 5649 // %{
 5650 //   match(addr);
 5651 
 5652 //   format %{ "[$addr]" %}
 5653 //   interface(MEMORY_INTER) %{
 5654 //     base(0xFFFFFFFF);
 5655 //     index(0x4);
 5656 //     scale(0x0);
 5657 //     disp($addr);
 5658 //   %}
 5659 // %}
 5660 
 5661 // Indirect Memory Operand
 5662 operand indirect(any_RegP reg)
 5663 %{
 5664   constraint(ALLOC_IN_RC(ptr_reg));
 5665   match(reg);
 5666 
 5667   format %{ "[$reg]" %}
 5668   interface(MEMORY_INTER) %{
 5669     base($reg);
 5670     index(0x4);
 5671     scale(0x0);
 5672     disp(0x0);
 5673   %}
 5674 %}
 5675 
 5676 // Indirect Memory Plus Short Offset Operand
 5677 operand indOffset8(any_RegP reg, immL8 off)
 5678 %{
 5679   constraint(ALLOC_IN_RC(ptr_reg));
 5680   match(AddP reg off);
 5681 
 5682   format %{ "[$reg + $off (8-bit)]" %}
 5683   interface(MEMORY_INTER) %{
 5684     base($reg);
 5685     index(0x4);
 5686     scale(0x0);
 5687     disp($off);
 5688   %}
 5689 %}
 5690 
 5691 // Indirect Memory Plus Long Offset Operand
 5692 operand indOffset32(any_RegP reg, immL32 off)
 5693 %{
 5694   constraint(ALLOC_IN_RC(ptr_reg));
 5695   match(AddP reg off);
 5696 
 5697   format %{ "[$reg + $off (32-bit)]" %}
 5698   interface(MEMORY_INTER) %{
 5699     base($reg);
 5700     index(0x4);
 5701     scale(0x0);
 5702     disp($off);
 5703   %}
 5704 %}
 5705 
 5706 // Indirect Memory Plus Index Register Plus Offset Operand
 5707 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5708 %{
 5709   constraint(ALLOC_IN_RC(ptr_reg));
 5710   match(AddP (AddP reg lreg) off);
 5711 
 5712   op_cost(10);
 5713   format %{"[$reg + $off + $lreg]" %}
 5714   interface(MEMORY_INTER) %{
 5715     base($reg);
 5716     index($lreg);
 5717     scale(0x0);
 5718     disp($off);
 5719   %}
 5720 %}
 5721 
 5722 // Indirect Memory Plus Index Register Plus Offset Operand
 5723 operand indIndex(any_RegP reg, rRegL lreg)
 5724 %{
 5725   constraint(ALLOC_IN_RC(ptr_reg));
 5726   match(AddP reg lreg);
 5727 
 5728   op_cost(10);
 5729   format %{"[$reg + $lreg]" %}
 5730   interface(MEMORY_INTER) %{
 5731     base($reg);
 5732     index($lreg);
 5733     scale(0x0);
 5734     disp(0x0);
 5735   %}
 5736 %}
 5737 
 5738 // Indirect Memory Times Scale Plus Index Register
 5739 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5740 %{
 5741   constraint(ALLOC_IN_RC(ptr_reg));
 5742   match(AddP reg (LShiftL lreg scale));
 5743 
 5744   op_cost(10);
 5745   format %{"[$reg + $lreg << $scale]" %}
 5746   interface(MEMORY_INTER) %{
 5747     base($reg);
 5748     index($lreg);
 5749     scale($scale);
 5750     disp(0x0);
 5751   %}
 5752 %}
 5753 
 5754 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5755 %{
 5756   constraint(ALLOC_IN_RC(ptr_reg));
 5757   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5758   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5759 
 5760   op_cost(10);
 5761   format %{"[$reg + pos $idx << $scale]" %}
 5762   interface(MEMORY_INTER) %{
 5763     base($reg);
 5764     index($idx);
 5765     scale($scale);
 5766     disp(0x0);
 5767   %}
 5768 %}
 5769 
 5770 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5771 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5772 %{
 5773   constraint(ALLOC_IN_RC(ptr_reg));
 5774   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5775 
 5776   op_cost(10);
 5777   format %{"[$reg + $off + $lreg << $scale]" %}
 5778   interface(MEMORY_INTER) %{
 5779     base($reg);
 5780     index($lreg);
 5781     scale($scale);
 5782     disp($off);
 5783   %}
 5784 %}
 5785 
 5786 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5787 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5788 %{
 5789   constraint(ALLOC_IN_RC(ptr_reg));
 5790   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5791   match(AddP (AddP reg (ConvI2L idx)) off);
 5792 
 5793   op_cost(10);
 5794   format %{"[$reg + $off + $idx]" %}
 5795   interface(MEMORY_INTER) %{
 5796     base($reg);
 5797     index($idx);
 5798     scale(0x0);
 5799     disp($off);
 5800   %}
 5801 %}
 5802 
 5803 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5804 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5805 %{
 5806   constraint(ALLOC_IN_RC(ptr_reg));
 5807   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5808   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5809 
 5810   op_cost(10);
 5811   format %{"[$reg + $off + $idx << $scale]" %}
 5812   interface(MEMORY_INTER) %{
 5813     base($reg);
 5814     index($idx);
 5815     scale($scale);
 5816     disp($off);
 5817   %}
 5818 %}
 5819 
 5820 // Indirect Narrow Oop Operand
 5821 operand indCompressedOop(rRegN reg) %{
 5822   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5823   constraint(ALLOC_IN_RC(ptr_reg));
 5824   match(DecodeN reg);
 5825 
 5826   op_cost(10);
 5827   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5828   interface(MEMORY_INTER) %{
 5829     base(0xc); // R12
 5830     index($reg);
 5831     scale(0x3);
 5832     disp(0x0);
 5833   %}
 5834 %}
 5835 
 5836 // Indirect Narrow Oop Plus Offset Operand
 5837 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5838 // we can't free r12 even with CompressedOops::base() == nullptr.
 5839 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5840   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5841   constraint(ALLOC_IN_RC(ptr_reg));
 5842   match(AddP (DecodeN reg) off);
 5843 
 5844   op_cost(10);
 5845   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5846   interface(MEMORY_INTER) %{
 5847     base(0xc); // R12
 5848     index($reg);
 5849     scale(0x3);
 5850     disp($off);
 5851   %}
 5852 %}
 5853 
 5854 // Indirect Memory Operand
 5855 operand indirectNarrow(rRegN reg)
 5856 %{
 5857   predicate(CompressedOops::shift() == 0);
 5858   constraint(ALLOC_IN_RC(ptr_reg));
 5859   match(DecodeN reg);
 5860 
 5861   format %{ "[$reg]" %}
 5862   interface(MEMORY_INTER) %{
 5863     base($reg);
 5864     index(0x4);
 5865     scale(0x0);
 5866     disp(0x0);
 5867   %}
 5868 %}
 5869 
 5870 // Indirect Memory Plus Short Offset Operand
 5871 operand indOffset8Narrow(rRegN reg, immL8 off)
 5872 %{
 5873   predicate(CompressedOops::shift() == 0);
 5874   constraint(ALLOC_IN_RC(ptr_reg));
 5875   match(AddP (DecodeN reg) off);
 5876 
 5877   format %{ "[$reg + $off (8-bit)]" %}
 5878   interface(MEMORY_INTER) %{
 5879     base($reg);
 5880     index(0x4);
 5881     scale(0x0);
 5882     disp($off);
 5883   %}
 5884 %}
 5885 
 5886 // Indirect Memory Plus Long Offset Operand
 5887 operand indOffset32Narrow(rRegN reg, immL32 off)
 5888 %{
 5889   predicate(CompressedOops::shift() == 0);
 5890   constraint(ALLOC_IN_RC(ptr_reg));
 5891   match(AddP (DecodeN reg) off);
 5892 
 5893   format %{ "[$reg + $off (32-bit)]" %}
 5894   interface(MEMORY_INTER) %{
 5895     base($reg);
 5896     index(0x4);
 5897     scale(0x0);
 5898     disp($off);
 5899   %}
 5900 %}
 5901 
 5902 // Indirect Memory Plus Index Register Plus Offset Operand
 5903 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5904 %{
 5905   predicate(CompressedOops::shift() == 0);
 5906   constraint(ALLOC_IN_RC(ptr_reg));
 5907   match(AddP (AddP (DecodeN reg) lreg) off);
 5908 
 5909   op_cost(10);
 5910   format %{"[$reg + $off + $lreg]" %}
 5911   interface(MEMORY_INTER) %{
 5912     base($reg);
 5913     index($lreg);
 5914     scale(0x0);
 5915     disp($off);
 5916   %}
 5917 %}
 5918 
 5919 // Indirect Memory Plus Index Register Plus Offset Operand
 5920 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5921 %{
 5922   predicate(CompressedOops::shift() == 0);
 5923   constraint(ALLOC_IN_RC(ptr_reg));
 5924   match(AddP (DecodeN reg) lreg);
 5925 
 5926   op_cost(10);
 5927   format %{"[$reg + $lreg]" %}
 5928   interface(MEMORY_INTER) %{
 5929     base($reg);
 5930     index($lreg);
 5931     scale(0x0);
 5932     disp(0x0);
 5933   %}
 5934 %}
 5935 
 5936 // Indirect Memory Times Scale Plus Index Register
 5937 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5938 %{
 5939   predicate(CompressedOops::shift() == 0);
 5940   constraint(ALLOC_IN_RC(ptr_reg));
 5941   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5942 
 5943   op_cost(10);
 5944   format %{"[$reg + $lreg << $scale]" %}
 5945   interface(MEMORY_INTER) %{
 5946     base($reg);
 5947     index($lreg);
 5948     scale($scale);
 5949     disp(0x0);
 5950   %}
 5951 %}
 5952 
 5953 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5954 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5955 %{
 5956   predicate(CompressedOops::shift() == 0);
 5957   constraint(ALLOC_IN_RC(ptr_reg));
 5958   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5959 
 5960   op_cost(10);
 5961   format %{"[$reg + $off + $lreg << $scale]" %}
 5962   interface(MEMORY_INTER) %{
 5963     base($reg);
 5964     index($lreg);
 5965     scale($scale);
 5966     disp($off);
 5967   %}
 5968 %}
 5969 
 5970 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5971 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5972 %{
 5973   constraint(ALLOC_IN_RC(ptr_reg));
 5974   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5975   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5976 
 5977   op_cost(10);
 5978   format %{"[$reg + $off + $idx]" %}
 5979   interface(MEMORY_INTER) %{
 5980     base($reg);
 5981     index($idx);
 5982     scale(0x0);
 5983     disp($off);
 5984   %}
 5985 %}
 5986 
 5987 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5988 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5989 %{
 5990   constraint(ALLOC_IN_RC(ptr_reg));
 5991   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5992   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5993 
 5994   op_cost(10);
 5995   format %{"[$reg + $off + $idx << $scale]" %}
 5996   interface(MEMORY_INTER) %{
 5997     base($reg);
 5998     index($idx);
 5999     scale($scale);
 6000     disp($off);
 6001   %}
 6002 %}
 6003 
 6004 //----------Special Memory Operands--------------------------------------------
 6005 // Stack Slot Operand - This operand is used for loading and storing temporary
 6006 //                      values on the stack where a match requires a value to
 6007 //                      flow through memory.
 6008 operand stackSlotP(sRegP reg)
 6009 %{
 6010   constraint(ALLOC_IN_RC(stack_slots));
 6011   // No match rule because this operand is only generated in matching
 6012 
 6013   format %{ "[$reg]" %}
 6014   interface(MEMORY_INTER) %{
 6015     base(0x4);   // RSP
 6016     index(0x4);  // No Index
 6017     scale(0x0);  // No Scale
 6018     disp($reg);  // Stack Offset
 6019   %}
 6020 %}
 6021 
 6022 operand stackSlotI(sRegI reg)
 6023 %{
 6024   constraint(ALLOC_IN_RC(stack_slots));
 6025   // No match rule because this operand is only generated in matching
 6026 
 6027   format %{ "[$reg]" %}
 6028   interface(MEMORY_INTER) %{
 6029     base(0x4);   // RSP
 6030     index(0x4);  // No Index
 6031     scale(0x0);  // No Scale
 6032     disp($reg);  // Stack Offset
 6033   %}
 6034 %}
 6035 
 6036 operand stackSlotF(sRegF reg)
 6037 %{
 6038   constraint(ALLOC_IN_RC(stack_slots));
 6039   // No match rule because this operand is only generated in matching
 6040 
 6041   format %{ "[$reg]" %}
 6042   interface(MEMORY_INTER) %{
 6043     base(0x4);   // RSP
 6044     index(0x4);  // No Index
 6045     scale(0x0);  // No Scale
 6046     disp($reg);  // Stack Offset
 6047   %}
 6048 %}
 6049 
 6050 operand stackSlotD(sRegD reg)
 6051 %{
 6052   constraint(ALLOC_IN_RC(stack_slots));
 6053   // No match rule because this operand is only generated in matching
 6054 
 6055   format %{ "[$reg]" %}
 6056   interface(MEMORY_INTER) %{
 6057     base(0x4);   // RSP
 6058     index(0x4);  // No Index
 6059     scale(0x0);  // No Scale
 6060     disp($reg);  // Stack Offset
 6061   %}
 6062 %}
 6063 operand stackSlotL(sRegL reg)
 6064 %{
 6065   constraint(ALLOC_IN_RC(stack_slots));
 6066   // No match rule because this operand is only generated in matching
 6067 
 6068   format %{ "[$reg]" %}
 6069   interface(MEMORY_INTER) %{
 6070     base(0x4);   // RSP
 6071     index(0x4);  // No Index
 6072     scale(0x0);  // No Scale
 6073     disp($reg);  // Stack Offset
 6074   %}
 6075 %}
 6076 
 6077 //----------Conditional Branch Operands----------------------------------------
 6078 // Comparison Op  - This is the operation of the comparison, and is limited to
 6079 //                  the following set of codes:
 6080 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6081 //
 6082 // Other attributes of the comparison, such as unsignedness, are specified
 6083 // by the comparison instruction that sets a condition code flags register.
 6084 // That result is represented by a flags operand whose subtype is appropriate
 6085 // to the unsignedness (etc.) of the comparison.
 6086 //
 6087 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6088 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6089 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6090 
 6091 // Comparison Code
 6092 operand cmpOp()
 6093 %{
 6094   match(Bool);
 6095 
 6096   format %{ "" %}
 6097   interface(COND_INTER) %{
 6098     equal(0x4, "e");
 6099     not_equal(0x5, "ne");
 6100     less(0xc, "l");
 6101     greater_equal(0xd, "ge");
 6102     less_equal(0xe, "le");
 6103     greater(0xf, "g");
 6104     overflow(0x0, "o");
 6105     no_overflow(0x1, "no");
 6106   %}
 6107 %}
 6108 
 6109 // Comparison Code, unsigned compare.  Used by FP also, with
 6110 // C2 (unordered) turned into GT or LT already.  The other bits
 6111 // C0 and C3 are turned into Carry & Zero flags.
 6112 operand cmpOpU()
 6113 %{
 6114   match(Bool);
 6115 
 6116   format %{ "" %}
 6117   interface(COND_INTER) %{
 6118     equal(0x4, "e");
 6119     not_equal(0x5, "ne");
 6120     less(0x2, "b");
 6121     greater_equal(0x3, "ae");
 6122     less_equal(0x6, "be");
 6123     greater(0x7, "a");
 6124     overflow(0x0, "o");
 6125     no_overflow(0x1, "no");
 6126   %}
 6127 %}
 6128 
 6129 
 6130 // Floating comparisons that don't require any fixup for the unordered case,
 6131 // If both inputs of the comparison are the same, ZF is always set so we
 6132 // don't need to use cmpOpUCF2 for eq/ne
 6133 operand cmpOpUCF() %{
 6134   match(Bool);
 6135   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6136             (n->as_Bool()->_test._test == BoolTest::lt ||
 6137              n->as_Bool()->_test._test == BoolTest::ge ||
 6138              n->as_Bool()->_test._test == BoolTest::le ||
 6139              n->as_Bool()->_test._test == BoolTest::gt ||
 6140              n->in(1)->in(1) == n->in(1)->in(2)));
 6141   format %{ "" %}
 6142   interface(COND_INTER) %{
 6143     equal(0xb, "np");
 6144     not_equal(0xa, "p");
 6145     less(0x2, "b");
 6146     greater_equal(0x3, "ae");
 6147     less_equal(0x6, "be");
 6148     greater(0x7, "a");
 6149     overflow(0x0, "o");
 6150     no_overflow(0x1, "no");
 6151   %}
 6152 %}
 6153 
 6154 
 6155 // Floating comparisons that can be fixed up with extra conditional jumps
 6156 operand cmpOpUCF2() %{
 6157   match(Bool);
 6158   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6159             (n->as_Bool()->_test._test == BoolTest::ne ||
 6160              n->as_Bool()->_test._test == BoolTest::eq) &&
 6161             n->in(1)->in(1) != n->in(1)->in(2));
 6162   format %{ "" %}
 6163   interface(COND_INTER) %{
 6164     equal(0x4, "e");
 6165     not_equal(0x5, "ne");
 6166     less(0x2, "b");
 6167     greater_equal(0x3, "ae");
 6168     less_equal(0x6, "be");
 6169     greater(0x7, "a");
 6170     overflow(0x0, "o");
 6171     no_overflow(0x1, "no");
 6172   %}
 6173 %}
 6174 
 6175 
 6176 // Floating point comparisons that set condition flags to test more directly,
 6177 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
 6178 // are used for L (<) and LE (<=) conditions. It's important to convert these
 6179 // latter conditions to ones that use unsigned tests before passing into an
 6180 // instruction because the preceding comparison might be based on a three way
 6181 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
 6182 operand cmpOpUCFE()
 6183 %{
 6184   match(Bool);
 6185   predicate((UseAPX && VM_Version::supports_avx10_2()) &&
 6186             (n->as_Bool()->_test._test == BoolTest::ne ||
 6187              n->as_Bool()->_test._test == BoolTest::eq ||
 6188              n->as_Bool()->_test._test == BoolTest::lt ||
 6189              n->as_Bool()->_test._test == BoolTest::ge ||
 6190              n->as_Bool()->_test._test == BoolTest::le ||
 6191              n->as_Bool()->_test._test == BoolTest::gt));
 6192 
 6193   format %{ "" %}
 6194   interface(COND_INTER) %{
 6195     equal(0x4, "e");
 6196     not_equal(0x5, "ne");
 6197     less(0x2, "b");
 6198     greater_equal(0x3, "ae");
 6199     less_equal(0x6, "be");
 6200     greater(0x7, "a");
 6201     overflow(0x0, "o");
 6202     no_overflow(0x1, "no");
 6203   %}
 6204 %}
 6205 
 6206 // Operands for bound floating pointer register arguments
 6207 operand rxmm0() %{
 6208   constraint(ALLOC_IN_RC(xmm0_reg));
 6209   match(VecX);
 6210   format%{%}
 6211   interface(REG_INTER);
 6212 %}
 6213 
 6214 // Vectors
 6215 
 6216 // Dummy generic vector class. Should be used for all vector operands.
 6217 // Replaced with vec[SDXYZ] during post-selection pass.
 6218 operand vec() %{
 6219   constraint(ALLOC_IN_RC(dynamic));
 6220   match(VecX);
 6221   match(VecY);
 6222   match(VecZ);
 6223   match(VecS);
 6224   match(VecD);
 6225 
 6226   format %{ %}
 6227   interface(REG_INTER);
 6228 %}
 6229 
 6230 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6231 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6232 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6233 // runtime code generation via reg_class_dynamic.
 6234 operand legVec() %{
 6235   constraint(ALLOC_IN_RC(dynamic));
 6236   match(VecX);
 6237   match(VecY);
 6238   match(VecZ);
 6239   match(VecS);
 6240   match(VecD);
 6241 
 6242   format %{ %}
 6243   interface(REG_INTER);
 6244 %}
 6245 
 6246 // Replaces vec during post-selection cleanup. See above.
 6247 operand vecS() %{
 6248   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6249   match(VecS);
 6250 
 6251   format %{ %}
 6252   interface(REG_INTER);
 6253 %}
 6254 
 6255 // Replaces legVec during post-selection cleanup. See above.
 6256 operand legVecS() %{
 6257   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6258   match(VecS);
 6259 
 6260   format %{ %}
 6261   interface(REG_INTER);
 6262 %}
 6263 
 6264 // Replaces vec during post-selection cleanup. See above.
 6265 operand vecD() %{
 6266   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6267   match(VecD);
 6268 
 6269   format %{ %}
 6270   interface(REG_INTER);
 6271 %}
 6272 
 6273 // Replaces legVec during post-selection cleanup. See above.
 6274 operand legVecD() %{
 6275   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6276   match(VecD);
 6277 
 6278   format %{ %}
 6279   interface(REG_INTER);
 6280 %}
 6281 
 6282 // Replaces vec during post-selection cleanup. See above.
 6283 operand vecX() %{
 6284   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6285   match(VecX);
 6286 
 6287   format %{ %}
 6288   interface(REG_INTER);
 6289 %}
 6290 
 6291 // Replaces legVec during post-selection cleanup. See above.
 6292 operand legVecX() %{
 6293   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6294   match(VecX);
 6295 
 6296   format %{ %}
 6297   interface(REG_INTER);
 6298 %}
 6299 
 6300 // Replaces vec during post-selection cleanup. See above.
 6301 operand vecY() %{
 6302   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6303   match(VecY);
 6304 
 6305   format %{ %}
 6306   interface(REG_INTER);
 6307 %}
 6308 
 6309 // Replaces legVec during post-selection cleanup. See above.
 6310 operand legVecY() %{
 6311   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6312   match(VecY);
 6313 
 6314   format %{ %}
 6315   interface(REG_INTER);
 6316 %}
 6317 
 6318 // Replaces vec during post-selection cleanup. See above.
 6319 operand vecZ() %{
 6320   constraint(ALLOC_IN_RC(vectorz_reg));
 6321   match(VecZ);
 6322 
 6323   format %{ %}
 6324   interface(REG_INTER);
 6325 %}
 6326 
 6327 // Replaces legVec during post-selection cleanup. See above.
 6328 operand legVecZ() %{
 6329   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6330   match(VecZ);
 6331 
 6332   format %{ %}
 6333   interface(REG_INTER);
 6334 %}
 6335 
 6336 //----------OPERAND CLASSES----------------------------------------------------
 6337 // Operand Classes are groups of operands that are used as to simplify
 6338 // instruction definitions by not requiring the AD writer to specify separate
 6339 // instructions for every form of operand when the instruction accepts
 6340 // multiple operand types with the same basic encoding and format.  The classic
 6341 // case of this is memory operands.
 6342 
 6343 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6344                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6345                indCompressedOop, indCompressedOopOffset,
 6346                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6347                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6348                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6349 
 6350 //----------PIPELINE-----------------------------------------------------------
 6351 // Rules which define the behavior of the target architectures pipeline.
 6352 pipeline %{
 6353 
 6354 //----------ATTRIBUTES---------------------------------------------------------
 6355 attributes %{
 6356   variable_size_instructions;        // Fixed size instructions
 6357   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6358   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6359   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6360   instruction_fetch_units = 1;       // of 16 bytes
 6361 %}
 6362 
 6363 //----------RESOURCES----------------------------------------------------------
 6364 // Resources are the functional units available to the machine
 6365 
 6366 // Generic P2/P3 pipeline
 6367 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6368 // 3 instructions decoded per cycle.
 6369 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6370 // 3 ALU op, only ALU0 handles mul instructions.
 6371 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6372            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6373            BR, FPU,
 6374            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6375 
 6376 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6377 // Pipeline Description specifies the stages in the machine's pipeline
 6378 
 6379 // Generic P2/P3 pipeline
 6380 pipe_desc(S0, S1, S2, S3, S4, S5);
 6381 
 6382 //----------PIPELINE CLASSES---------------------------------------------------
 6383 // Pipeline Classes describe the stages in which input and output are
 6384 // referenced by the hardware pipeline.
 6385 
 6386 // Naming convention: ialu or fpu
 6387 // Then: _reg
 6388 // Then: _reg if there is a 2nd register
 6389 // Then: _long if it's a pair of instructions implementing a long
 6390 // Then: _fat if it requires the big decoder
 6391 //   Or: _mem if it requires the big decoder and a memory unit.
 6392 
 6393 // Integer ALU reg operation
 6394 pipe_class ialu_reg(rRegI dst)
 6395 %{
 6396     single_instruction;
 6397     dst    : S4(write);
 6398     dst    : S3(read);
 6399     DECODE : S0;        // any decoder
 6400     ALU    : S3;        // any alu
 6401 %}
 6402 
 6403 // Long ALU reg operation
 6404 pipe_class ialu_reg_long(rRegL dst)
 6405 %{
 6406     instruction_count(2);
 6407     dst    : S4(write);
 6408     dst    : S3(read);
 6409     DECODE : S0(2);     // any 2 decoders
 6410     ALU    : S3(2);     // both alus
 6411 %}
 6412 
 6413 // Integer ALU reg operation using big decoder
 6414 pipe_class ialu_reg_fat(rRegI dst)
 6415 %{
 6416     single_instruction;
 6417     dst    : S4(write);
 6418     dst    : S3(read);
 6419     D0     : S0;        // big decoder only
 6420     ALU    : S3;        // any alu
 6421 %}
 6422 
 6423 // Integer ALU reg-reg operation
 6424 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6425 %{
 6426     single_instruction;
 6427     dst    : S4(write);
 6428     src    : S3(read);
 6429     DECODE : S0;        // any decoder
 6430     ALU    : S3;        // any alu
 6431 %}
 6432 
 6433 // Integer ALU reg-reg operation
 6434 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6435 %{
 6436     single_instruction;
 6437     dst    : S4(write);
 6438     src    : S3(read);
 6439     D0     : S0;        // big decoder only
 6440     ALU    : S3;        // any alu
 6441 %}
 6442 
 6443 // Integer ALU reg-mem operation
 6444 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6445 %{
 6446     single_instruction;
 6447     dst    : S5(write);
 6448     mem    : S3(read);
 6449     D0     : S0;        // big decoder only
 6450     ALU    : S4;        // any alu
 6451     MEM    : S3;        // any mem
 6452 %}
 6453 
 6454 // Integer mem operation (prefetch)
 6455 pipe_class ialu_mem(memory mem)
 6456 %{
 6457     single_instruction;
 6458     mem    : S3(read);
 6459     D0     : S0;        // big decoder only
 6460     MEM    : S3;        // any mem
 6461 %}
 6462 
 6463 // Integer Store to Memory
 6464 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6465 %{
 6466     single_instruction;
 6467     mem    : S3(read);
 6468     src    : S5(read);
 6469     D0     : S0;        // big decoder only
 6470     ALU    : S4;        // any alu
 6471     MEM    : S3;
 6472 %}
 6473 
 6474 // // Long Store to Memory
 6475 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6476 // %{
 6477 //     instruction_count(2);
 6478 //     mem    : S3(read);
 6479 //     src    : S5(read);
 6480 //     D0     : S0(2);          // big decoder only; twice
 6481 //     ALU    : S4(2);     // any 2 alus
 6482 //     MEM    : S3(2);  // Both mems
 6483 // %}
 6484 
 6485 // Integer Store to Memory
 6486 pipe_class ialu_mem_imm(memory mem)
 6487 %{
 6488     single_instruction;
 6489     mem    : S3(read);
 6490     D0     : S0;        // big decoder only
 6491     ALU    : S4;        // any alu
 6492     MEM    : S3;
 6493 %}
 6494 
 6495 // Integer ALU0 reg-reg operation
 6496 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6497 %{
 6498     single_instruction;
 6499     dst    : S4(write);
 6500     src    : S3(read);
 6501     D0     : S0;        // Big decoder only
 6502     ALU0   : S3;        // only alu0
 6503 %}
 6504 
 6505 // Integer ALU0 reg-mem operation
 6506 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6507 %{
 6508     single_instruction;
 6509     dst    : S5(write);
 6510     mem    : S3(read);
 6511     D0     : S0;        // big decoder only
 6512     ALU0   : S4;        // ALU0 only
 6513     MEM    : S3;        // any mem
 6514 %}
 6515 
 6516 // Integer ALU reg-reg operation
 6517 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6518 %{
 6519     single_instruction;
 6520     cr     : S4(write);
 6521     src1   : S3(read);
 6522     src2   : S3(read);
 6523     DECODE : S0;        // any decoder
 6524     ALU    : S3;        // any alu
 6525 %}
 6526 
 6527 // Integer ALU reg-imm operation
 6528 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6529 %{
 6530     single_instruction;
 6531     cr     : S4(write);
 6532     src1   : S3(read);
 6533     DECODE : S0;        // any decoder
 6534     ALU    : S3;        // any alu
 6535 %}
 6536 
 6537 // Integer ALU reg-mem operation
 6538 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6539 %{
 6540     single_instruction;
 6541     cr     : S4(write);
 6542     src1   : S3(read);
 6543     src2   : S3(read);
 6544     D0     : S0;        // big decoder only
 6545     ALU    : S4;        // any alu
 6546     MEM    : S3;
 6547 %}
 6548 
 6549 // Conditional move reg-reg
 6550 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6551 %{
 6552     instruction_count(4);
 6553     y      : S4(read);
 6554     q      : S3(read);
 6555     p      : S3(read);
 6556     DECODE : S0(4);     // any decoder
 6557 %}
 6558 
 6559 // Conditional move reg-reg
 6560 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6561 %{
 6562     single_instruction;
 6563     dst    : S4(write);
 6564     src    : S3(read);
 6565     cr     : S3(read);
 6566     DECODE : S0;        // any decoder
 6567 %}
 6568 
 6569 // Conditional move reg-mem
 6570 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6571 %{
 6572     single_instruction;
 6573     dst    : S4(write);
 6574     src    : S3(read);
 6575     cr     : S3(read);
 6576     DECODE : S0;        // any decoder
 6577     MEM    : S3;
 6578 %}
 6579 
 6580 // Conditional move reg-reg long
 6581 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6582 %{
 6583     single_instruction;
 6584     dst    : S4(write);
 6585     src    : S3(read);
 6586     cr     : S3(read);
 6587     DECODE : S0(2);     // any 2 decoders
 6588 %}
 6589 
 6590 // Float reg-reg operation
 6591 pipe_class fpu_reg(regD dst)
 6592 %{
 6593     instruction_count(2);
 6594     dst    : S3(read);
 6595     DECODE : S0(2);     // any 2 decoders
 6596     FPU    : S3;
 6597 %}
 6598 
 6599 // Float reg-reg operation
 6600 pipe_class fpu_reg_reg(regD dst, regD src)
 6601 %{
 6602     instruction_count(2);
 6603     dst    : S4(write);
 6604     src    : S3(read);
 6605     DECODE : S0(2);     // any 2 decoders
 6606     FPU    : S3;
 6607 %}
 6608 
 6609 // Float reg-reg operation
 6610 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6611 %{
 6612     instruction_count(3);
 6613     dst    : S4(write);
 6614     src1   : S3(read);
 6615     src2   : S3(read);
 6616     DECODE : S0(3);     // any 3 decoders
 6617     FPU    : S3(2);
 6618 %}
 6619 
 6620 // Float reg-reg operation
 6621 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6622 %{
 6623     instruction_count(4);
 6624     dst    : S4(write);
 6625     src1   : S3(read);
 6626     src2   : S3(read);
 6627     src3   : S3(read);
 6628     DECODE : S0(4);     // any 3 decoders
 6629     FPU    : S3(2);
 6630 %}
 6631 
 6632 // Float reg-reg operation
 6633 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6634 %{
 6635     instruction_count(4);
 6636     dst    : S4(write);
 6637     src1   : S3(read);
 6638     src2   : S3(read);
 6639     src3   : S3(read);
 6640     DECODE : S1(3);     // any 3 decoders
 6641     D0     : S0;        // Big decoder only
 6642     FPU    : S3(2);
 6643     MEM    : S3;
 6644 %}
 6645 
 6646 // Float reg-mem operation
 6647 pipe_class fpu_reg_mem(regD dst, memory mem)
 6648 %{
 6649     instruction_count(2);
 6650     dst    : S5(write);
 6651     mem    : S3(read);
 6652     D0     : S0;        // big decoder only
 6653     DECODE : S1;        // any decoder for FPU POP
 6654     FPU    : S4;
 6655     MEM    : S3;        // any mem
 6656 %}
 6657 
 6658 // Float reg-mem operation
 6659 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6660 %{
 6661     instruction_count(3);
 6662     dst    : S5(write);
 6663     src1   : S3(read);
 6664     mem    : S3(read);
 6665     D0     : S0;        // big decoder only
 6666     DECODE : S1(2);     // any decoder for FPU POP
 6667     FPU    : S4;
 6668     MEM    : S3;        // any mem
 6669 %}
 6670 
 6671 // Float mem-reg operation
 6672 pipe_class fpu_mem_reg(memory mem, regD src)
 6673 %{
 6674     instruction_count(2);
 6675     src    : S5(read);
 6676     mem    : S3(read);
 6677     DECODE : S0;        // any decoder for FPU PUSH
 6678     D0     : S1;        // big decoder only
 6679     FPU    : S4;
 6680     MEM    : S3;        // any mem
 6681 %}
 6682 
 6683 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6684 %{
 6685     instruction_count(3);
 6686     src1   : S3(read);
 6687     src2   : S3(read);
 6688     mem    : S3(read);
 6689     DECODE : S0(2);     // any decoder for FPU PUSH
 6690     D0     : S1;        // big decoder only
 6691     FPU    : S4;
 6692     MEM    : S3;        // any mem
 6693 %}
 6694 
 6695 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6696 %{
 6697     instruction_count(3);
 6698     src1   : S3(read);
 6699     src2   : S3(read);
 6700     mem    : S4(read);
 6701     DECODE : S0;        // any decoder for FPU PUSH
 6702     D0     : S0(2);     // big decoder only
 6703     FPU    : S4;
 6704     MEM    : S3(2);     // any mem
 6705 %}
 6706 
 6707 pipe_class fpu_mem_mem(memory dst, memory src1)
 6708 %{
 6709     instruction_count(2);
 6710     src1   : S3(read);
 6711     dst    : S4(read);
 6712     D0     : S0(2);     // big decoder only
 6713     MEM    : S3(2);     // any mem
 6714 %}
 6715 
 6716 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6717 %{
 6718     instruction_count(3);
 6719     src1   : S3(read);
 6720     src2   : S3(read);
 6721     dst    : S4(read);
 6722     D0     : S0(3);     // big decoder only
 6723     FPU    : S4;
 6724     MEM    : S3(3);     // any mem
 6725 %}
 6726 
 6727 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6728 %{
 6729     instruction_count(3);
 6730     src1   : S4(read);
 6731     mem    : S4(read);
 6732     DECODE : S0;        // any decoder for FPU PUSH
 6733     D0     : S0(2);     // big decoder only
 6734     FPU    : S4;
 6735     MEM    : S3(2);     // any mem
 6736 %}
 6737 
 6738 // Float load constant
 6739 pipe_class fpu_reg_con(regD dst)
 6740 %{
 6741     instruction_count(2);
 6742     dst    : S5(write);
 6743     D0     : S0;        // big decoder only for the load
 6744     DECODE : S1;        // any decoder for FPU POP
 6745     FPU    : S4;
 6746     MEM    : S3;        // any mem
 6747 %}
 6748 
 6749 // Float load constant
 6750 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6751 %{
 6752     instruction_count(3);
 6753     dst    : S5(write);
 6754     src    : S3(read);
 6755     D0     : S0;        // big decoder only for the load
 6756     DECODE : S1(2);     // any decoder for FPU POP
 6757     FPU    : S4;
 6758     MEM    : S3;        // any mem
 6759 %}
 6760 
 6761 // UnConditional branch
 6762 pipe_class pipe_jmp(label labl)
 6763 %{
 6764     single_instruction;
 6765     BR   : S3;
 6766 %}
 6767 
 6768 // Conditional branch
 6769 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6770 %{
 6771     single_instruction;
 6772     cr    : S1(read);
 6773     BR    : S3;
 6774 %}
 6775 
 6776 // Allocation idiom
 6777 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6778 %{
 6779     instruction_count(1); force_serialization;
 6780     fixed_latency(6);
 6781     heap_ptr : S3(read);
 6782     DECODE   : S0(3);
 6783     D0       : S2;
 6784     MEM      : S3;
 6785     ALU      : S3(2);
 6786     dst      : S5(write);
 6787     BR       : S5;
 6788 %}
 6789 
 6790 // Generic big/slow expanded idiom
 6791 pipe_class pipe_slow()
 6792 %{
 6793     instruction_count(10); multiple_bundles; force_serialization;
 6794     fixed_latency(100);
 6795     D0  : S0(2);
 6796     MEM : S3(2);
 6797 %}
 6798 
 6799 // The real do-nothing guy
 6800 pipe_class empty()
 6801 %{
 6802     instruction_count(0);
 6803 %}
 6804 
 6805 // Define the class for the Nop node
 6806 define
 6807 %{
 6808    MachNop = empty;
 6809 %}
 6810 
 6811 %}
 6812 
 6813 //----------INSTRUCTIONS-------------------------------------------------------
 6814 //
 6815 // match      -- States which machine-independent subtree may be replaced
 6816 //               by this instruction.
 6817 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6818 //               selection to identify a minimum cost tree of machine
 6819 //               instructions that matches a tree of machine-independent
 6820 //               instructions.
 6821 // format     -- A string providing the disassembly for this instruction.
 6822 //               The value of an instruction's operand may be inserted
 6823 //               by referring to it with a '$' prefix.
 6824 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6825 //               to within an encode class as $primary, $secondary, and $tertiary
 6826 //               rrspectively.  The primary opcode is commonly used to
 6827 //               indicate the type of machine instruction, while secondary
 6828 //               and tertiary are often used for prefix options or addressing
 6829 //               modes.
 6830 // ins_encode -- A list of encode classes with parameters. The encode class
 6831 //               name must have been defined in an 'enc_class' specification
 6832 //               in the encode section of the architecture description.
 6833 
 6834 // ============================================================================
 6835 
 6836 instruct ShouldNotReachHere() %{
 6837   match(Halt);
 6838   format %{ "stop\t# ShouldNotReachHere" %}
 6839   ins_encode %{
 6840     if (is_reachable()) {
 6841       const char* str = __ code_string(_halt_reason);
 6842       __ stop(str);
 6843     }
 6844   %}
 6845   ins_pipe(pipe_slow);
 6846 %}
 6847 
 6848 // ============================================================================
 6849 
 6850 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6851 // Load Float
 6852 instruct MoveF2VL(vlRegF dst, regF src) %{
 6853   match(Set dst src);
 6854   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6855   ins_encode %{
 6856     ShouldNotReachHere();
 6857   %}
 6858   ins_pipe( fpu_reg_reg );
 6859 %}
 6860 
 6861 // Load Float
 6862 instruct MoveF2LEG(legRegF dst, regF src) %{
 6863   match(Set dst src);
 6864   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6865   ins_encode %{
 6866     ShouldNotReachHere();
 6867   %}
 6868   ins_pipe( fpu_reg_reg );
 6869 %}
 6870 
 6871 // Load Float
 6872 instruct MoveVL2F(regF dst, vlRegF src) %{
 6873   match(Set dst src);
 6874   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6875   ins_encode %{
 6876     ShouldNotReachHere();
 6877   %}
 6878   ins_pipe( fpu_reg_reg );
 6879 %}
 6880 
 6881 // Load Float
 6882 instruct MoveLEG2F(regF dst, legRegF src) %{
 6883   match(Set dst src);
 6884   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6885   ins_encode %{
 6886     ShouldNotReachHere();
 6887   %}
 6888   ins_pipe( fpu_reg_reg );
 6889 %}
 6890 
 6891 // Load Double
 6892 instruct MoveD2VL(vlRegD dst, regD src) %{
 6893   match(Set dst src);
 6894   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6895   ins_encode %{
 6896     ShouldNotReachHere();
 6897   %}
 6898   ins_pipe( fpu_reg_reg );
 6899 %}
 6900 
 6901 // Load Double
 6902 instruct MoveD2LEG(legRegD dst, regD src) %{
 6903   match(Set dst src);
 6904   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6905   ins_encode %{
 6906     ShouldNotReachHere();
 6907   %}
 6908   ins_pipe( fpu_reg_reg );
 6909 %}
 6910 
 6911 // Load Double
 6912 instruct MoveVL2D(regD dst, vlRegD src) %{
 6913   match(Set dst src);
 6914   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6915   ins_encode %{
 6916     ShouldNotReachHere();
 6917   %}
 6918   ins_pipe( fpu_reg_reg );
 6919 %}
 6920 
 6921 // Load Double
 6922 instruct MoveLEG2D(regD dst, legRegD src) %{
 6923   match(Set dst src);
 6924   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6925   ins_encode %{
 6926     ShouldNotReachHere();
 6927   %}
 6928   ins_pipe( fpu_reg_reg );
 6929 %}
 6930 
 6931 //----------Load/Store/Move Instructions---------------------------------------
 6932 //----------Load Instructions--------------------------------------------------
 6933 
 6934 // Load Byte (8 bit signed)
 6935 instruct loadB(rRegI dst, memory mem)
 6936 %{
 6937   match(Set dst (LoadB mem));
 6938 
 6939   ins_cost(125);
 6940   format %{ "movsbl  $dst, $mem\t# byte" %}
 6941 
 6942   ins_encode %{
 6943     __ movsbl($dst$$Register, $mem$$Address);
 6944   %}
 6945 
 6946   ins_pipe(ialu_reg_mem);
 6947 %}
 6948 
 6949 // Load Byte (8 bit signed) into Long Register
 6950 instruct loadB2L(rRegL dst, memory mem)
 6951 %{
 6952   match(Set dst (ConvI2L (LoadB mem)));
 6953 
 6954   ins_cost(125);
 6955   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6956 
 6957   ins_encode %{
 6958     __ movsbq($dst$$Register, $mem$$Address);
 6959   %}
 6960 
 6961   ins_pipe(ialu_reg_mem);
 6962 %}
 6963 
 6964 // Load Unsigned Byte (8 bit UNsigned)
 6965 instruct loadUB(rRegI dst, memory mem)
 6966 %{
 6967   match(Set dst (LoadUB mem));
 6968 
 6969   ins_cost(125);
 6970   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6971 
 6972   ins_encode %{
 6973     __ movzbl($dst$$Register, $mem$$Address);
 6974   %}
 6975 
 6976   ins_pipe(ialu_reg_mem);
 6977 %}
 6978 
 6979 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6980 instruct loadUB2L(rRegL dst, memory mem)
 6981 %{
 6982   match(Set dst (ConvI2L (LoadUB mem)));
 6983 
 6984   ins_cost(125);
 6985   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6986 
 6987   ins_encode %{
 6988     __ movzbq($dst$$Register, $mem$$Address);
 6989   %}
 6990 
 6991   ins_pipe(ialu_reg_mem);
 6992 %}
 6993 
 6994 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6995 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6996   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6997   effect(KILL cr);
 6998 
 6999   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 7000             "andl    $dst, right_n_bits($mask, 8)" %}
 7001   ins_encode %{
 7002     Register Rdst = $dst$$Register;
 7003     __ movzbq(Rdst, $mem$$Address);
 7004     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 7005   %}
 7006   ins_pipe(ialu_reg_mem);
 7007 %}
 7008 
 7009 // Load Short (16 bit signed)
 7010 instruct loadS(rRegI dst, memory mem)
 7011 %{
 7012   match(Set dst (LoadS mem));
 7013 
 7014   ins_cost(125);
 7015   format %{ "movswl $dst, $mem\t# short" %}
 7016 
 7017   ins_encode %{
 7018     __ movswl($dst$$Register, $mem$$Address);
 7019   %}
 7020 
 7021   ins_pipe(ialu_reg_mem);
 7022 %}
 7023 
 7024 // Load Short (16 bit signed) to Byte (8 bit signed)
 7025 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7026   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 7027 
 7028   ins_cost(125);
 7029   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 7030   ins_encode %{
 7031     __ movsbl($dst$$Register, $mem$$Address);
 7032   %}
 7033   ins_pipe(ialu_reg_mem);
 7034 %}
 7035 
 7036 // Load Short (16 bit signed) into Long Register
 7037 instruct loadS2L(rRegL dst, memory mem)
 7038 %{
 7039   match(Set dst (ConvI2L (LoadS mem)));
 7040 
 7041   ins_cost(125);
 7042   format %{ "movswq $dst, $mem\t# short -> long" %}
 7043 
 7044   ins_encode %{
 7045     __ movswq($dst$$Register, $mem$$Address);
 7046   %}
 7047 
 7048   ins_pipe(ialu_reg_mem);
 7049 %}
 7050 
 7051 // Load Unsigned Short/Char (16 bit UNsigned)
 7052 instruct loadUS(rRegI dst, memory mem)
 7053 %{
 7054   match(Set dst (LoadUS mem));
 7055 
 7056   ins_cost(125);
 7057   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 7058 
 7059   ins_encode %{
 7060     __ movzwl($dst$$Register, $mem$$Address);
 7061   %}
 7062 
 7063   ins_pipe(ialu_reg_mem);
 7064 %}
 7065 
 7066 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 7067 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7068   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 7069 
 7070   ins_cost(125);
 7071   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 7072   ins_encode %{
 7073     __ movsbl($dst$$Register, $mem$$Address);
 7074   %}
 7075   ins_pipe(ialu_reg_mem);
 7076 %}
 7077 
 7078 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 7079 instruct loadUS2L(rRegL dst, memory mem)
 7080 %{
 7081   match(Set dst (ConvI2L (LoadUS mem)));
 7082 
 7083   ins_cost(125);
 7084   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 7085 
 7086   ins_encode %{
 7087     __ movzwq($dst$$Register, $mem$$Address);
 7088   %}
 7089 
 7090   ins_pipe(ialu_reg_mem);
 7091 %}
 7092 
 7093 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7094 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7095   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7096 
 7097   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7098   ins_encode %{
 7099     __ movzbq($dst$$Register, $mem$$Address);
 7100   %}
 7101   ins_pipe(ialu_reg_mem);
 7102 %}
 7103 
 7104 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7105 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7106   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7107   effect(KILL cr);
 7108 
 7109   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7110             "andl    $dst, right_n_bits($mask, 16)" %}
 7111   ins_encode %{
 7112     Register Rdst = $dst$$Register;
 7113     __ movzwq(Rdst, $mem$$Address);
 7114     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7115   %}
 7116   ins_pipe(ialu_reg_mem);
 7117 %}
 7118 
 7119 // Load Integer
 7120 instruct loadI(rRegI dst, memory mem)
 7121 %{
 7122   match(Set dst (LoadI mem));
 7123 
 7124   ins_cost(125);
 7125   format %{ "movl    $dst, $mem\t# int" %}
 7126 
 7127   ins_encode %{
 7128     __ movl($dst$$Register, $mem$$Address);
 7129   %}
 7130 
 7131   ins_pipe(ialu_reg_mem);
 7132 %}
 7133 
 7134 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7135 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7136   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7137 
 7138   ins_cost(125);
 7139   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7140   ins_encode %{
 7141     __ movsbl($dst$$Register, $mem$$Address);
 7142   %}
 7143   ins_pipe(ialu_reg_mem);
 7144 %}
 7145 
 7146 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7147 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7148   match(Set dst (AndI (LoadI mem) mask));
 7149 
 7150   ins_cost(125);
 7151   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7152   ins_encode %{
 7153     __ movzbl($dst$$Register, $mem$$Address);
 7154   %}
 7155   ins_pipe(ialu_reg_mem);
 7156 %}
 7157 
 7158 // Load Integer (32 bit signed) to Short (16 bit signed)
 7159 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7160   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7161 
 7162   ins_cost(125);
 7163   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7164   ins_encode %{
 7165     __ movswl($dst$$Register, $mem$$Address);
 7166   %}
 7167   ins_pipe(ialu_reg_mem);
 7168 %}
 7169 
 7170 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7171 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7172   match(Set dst (AndI (LoadI mem) mask));
 7173 
 7174   ins_cost(125);
 7175   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7176   ins_encode %{
 7177     __ movzwl($dst$$Register, $mem$$Address);
 7178   %}
 7179   ins_pipe(ialu_reg_mem);
 7180 %}
 7181 
 7182 // Load Integer into Long Register
 7183 instruct loadI2L(rRegL dst, memory mem)
 7184 %{
 7185   match(Set dst (ConvI2L (LoadI mem)));
 7186 
 7187   ins_cost(125);
 7188   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7189 
 7190   ins_encode %{
 7191     __ movslq($dst$$Register, $mem$$Address);
 7192   %}
 7193 
 7194   ins_pipe(ialu_reg_mem);
 7195 %}
 7196 
 7197 // Load Integer with mask 0xFF into Long Register
 7198 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7199   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7200 
 7201   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7202   ins_encode %{
 7203     __ movzbq($dst$$Register, $mem$$Address);
 7204   %}
 7205   ins_pipe(ialu_reg_mem);
 7206 %}
 7207 
 7208 // Load Integer with mask 0xFFFF into Long Register
 7209 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7210   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7211 
 7212   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7213   ins_encode %{
 7214     __ movzwq($dst$$Register, $mem$$Address);
 7215   %}
 7216   ins_pipe(ialu_reg_mem);
 7217 %}
 7218 
 7219 // Load Integer with a 31-bit mask into Long Register
 7220 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7221   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7222   effect(KILL cr);
 7223 
 7224   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7225             "andl    $dst, $mask" %}
 7226   ins_encode %{
 7227     Register Rdst = $dst$$Register;
 7228     __ movl(Rdst, $mem$$Address);
 7229     __ andl(Rdst, $mask$$constant);
 7230   %}
 7231   ins_pipe(ialu_reg_mem);
 7232 %}
 7233 
 7234 // Load Unsigned Integer into Long Register
 7235 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7236 %{
 7237   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7238 
 7239   ins_cost(125);
 7240   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7241 
 7242   ins_encode %{
 7243     __ movl($dst$$Register, $mem$$Address);
 7244   %}
 7245 
 7246   ins_pipe(ialu_reg_mem);
 7247 %}
 7248 
 7249 // Load Long
 7250 instruct loadL(rRegL dst, memory mem)
 7251 %{
 7252   match(Set dst (LoadL mem));
 7253 
 7254   ins_cost(125);
 7255   format %{ "movq    $dst, $mem\t# long" %}
 7256 
 7257   ins_encode %{
 7258     __ movq($dst$$Register, $mem$$Address);
 7259   %}
 7260 
 7261   ins_pipe(ialu_reg_mem); // XXX
 7262 %}
 7263 
 7264 // Load Range
 7265 instruct loadRange(rRegI dst, memory mem)
 7266 %{
 7267   match(Set dst (LoadRange mem));
 7268 
 7269   ins_cost(125); // XXX
 7270   format %{ "movl    $dst, $mem\t# range" %}
 7271   ins_encode %{
 7272     __ movl($dst$$Register, $mem$$Address);
 7273   %}
 7274   ins_pipe(ialu_reg_mem);
 7275 %}
 7276 
 7277 // Load Pointer
 7278 instruct loadP(rRegP dst, memory mem)
 7279 %{
 7280   match(Set dst (LoadP mem));
 7281   predicate(n->as_Load()->barrier_data() == 0);
 7282 
 7283   ins_cost(125); // XXX
 7284   format %{ "movq    $dst, $mem\t# ptr" %}
 7285   ins_encode %{
 7286     __ movq($dst$$Register, $mem$$Address);
 7287   %}
 7288   ins_pipe(ialu_reg_mem); // XXX
 7289 %}
 7290 
 7291 // Load Compressed Pointer
 7292 instruct loadN(rRegN dst, memory mem)
 7293 %{
 7294    predicate(n->as_Load()->barrier_data() == 0);
 7295    match(Set dst (LoadN mem));
 7296 
 7297    ins_cost(125); // XXX
 7298    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7299    ins_encode %{
 7300      __ movl($dst$$Register, $mem$$Address);
 7301    %}
 7302    ins_pipe(ialu_reg_mem); // XXX
 7303 %}
 7304 
 7305 
 7306 // Load Klass Pointer
 7307 instruct loadKlass(rRegP dst, memory mem)
 7308 %{
 7309   match(Set dst (LoadKlass mem));
 7310 
 7311   ins_cost(125); // XXX
 7312   format %{ "movq    $dst, $mem\t# class" %}
 7313   ins_encode %{
 7314     __ movq($dst$$Register, $mem$$Address);
 7315   %}
 7316   ins_pipe(ialu_reg_mem); // XXX
 7317 %}
 7318 
 7319 // Load narrow Klass Pointer
 7320 instruct loadNKlass(rRegN dst, memory mem)
 7321 %{
 7322   predicate(!UseCompactObjectHeaders);
 7323   match(Set dst (LoadNKlass mem));
 7324 
 7325   ins_cost(125); // XXX
 7326   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7327   ins_encode %{
 7328     __ movl($dst$$Register, $mem$$Address);
 7329   %}
 7330   ins_pipe(ialu_reg_mem); // XXX
 7331 %}
 7332 
 7333 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7334 %{
 7335   predicate(UseCompactObjectHeaders);
 7336   match(Set dst (LoadNKlass mem));
 7337   effect(KILL cr);
 7338   ins_cost(125);
 7339   format %{
 7340     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7341     "shrl    $dst, markWord::klass_shift_at_offset"
 7342   %}
 7343   ins_encode %{
 7344     if (UseAPX) {
 7345       __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
 7346     }
 7347     else {
 7348       __ movl($dst$$Register, $mem$$Address);
 7349       __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7350     }
 7351   %}
 7352   ins_pipe(ialu_reg_mem);
 7353 %}
 7354 
 7355 // Load Float
 7356 instruct loadF(regF dst, memory mem)
 7357 %{
 7358   match(Set dst (LoadF mem));
 7359 
 7360   ins_cost(145); // XXX
 7361   format %{ "movss   $dst, $mem\t# float" %}
 7362   ins_encode %{
 7363     __ movflt($dst$$XMMRegister, $mem$$Address);
 7364   %}
 7365   ins_pipe(pipe_slow); // XXX
 7366 %}
 7367 
 7368 // Load Double
 7369 instruct loadD_partial(regD dst, memory mem)
 7370 %{
 7371   predicate(!UseXmmLoadAndClearUpper);
 7372   match(Set dst (LoadD mem));
 7373 
 7374   ins_cost(145); // XXX
 7375   format %{ "movlpd  $dst, $mem\t# double" %}
 7376   ins_encode %{
 7377     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7378   %}
 7379   ins_pipe(pipe_slow); // XXX
 7380 %}
 7381 
 7382 instruct loadD(regD dst, memory mem)
 7383 %{
 7384   predicate(UseXmmLoadAndClearUpper);
 7385   match(Set dst (LoadD mem));
 7386 
 7387   ins_cost(145); // XXX
 7388   format %{ "movsd   $dst, $mem\t# double" %}
 7389   ins_encode %{
 7390     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7391   %}
 7392   ins_pipe(pipe_slow); // XXX
 7393 %}
 7394 
 7395 // max = java.lang.Math.max(float a, float b)
 7396 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
 7397   predicate(VM_Version::supports_avx10_2());
 7398   match(Set dst (MaxF a b));
 7399   format %{ "maxF $dst, $a, $b" %}
 7400   ins_encode %{
 7401     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7402   %}
 7403   ins_pipe( pipe_slow );
 7404 %}
 7405 
 7406 // max = java.lang.Math.max(float a, float b)
 7407 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7408   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7409   match(Set dst (MaxF a b));
 7410   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7411   format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7412   ins_encode %{
 7413     __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7414   %}
 7415   ins_pipe( pipe_slow );
 7416 %}
 7417 
 7418 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7419   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7420   match(Set dst (MaxF a b));
 7421   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7422 
 7423   format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7424   ins_encode %{
 7425     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7426                     false /*min*/, true /*single*/);
 7427   %}
 7428   ins_pipe( pipe_slow );
 7429 %}
 7430 
 7431 // max = java.lang.Math.max(double a, double b)
 7432 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
 7433   predicate(VM_Version::supports_avx10_2());
 7434   match(Set dst (MaxD a b));
 7435   format %{ "maxD $dst, $a, $b" %}
 7436   ins_encode %{
 7437     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7438   %}
 7439   ins_pipe( pipe_slow );
 7440 %}
 7441 
 7442 // max = java.lang.Math.max(double a, double b)
 7443 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7444   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7445   match(Set dst (MaxD a b));
 7446   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7447   format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7448   ins_encode %{
 7449     __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7450   %}
 7451   ins_pipe( pipe_slow );
 7452 %}
 7453 
 7454 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7455   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7456   match(Set dst (MaxD a b));
 7457   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7458 
 7459   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7460   ins_encode %{
 7461     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7462                     false /*min*/, false /*single*/);
 7463   %}
 7464   ins_pipe( pipe_slow );
 7465 %}
 7466 
 7467 // max = java.lang.Math.min(float a, float b)
 7468 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
 7469   predicate(VM_Version::supports_avx10_2());
 7470   match(Set dst (MinF a b));
 7471   format %{ "minF $dst, $a, $b" %}
 7472   ins_encode %{
 7473     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7474   %}
 7475   ins_pipe( pipe_slow );
 7476 %}
 7477 
 7478 // min = java.lang.Math.min(float a, float b)
 7479 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7480   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7481   match(Set dst (MinF a b));
 7482   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7483   format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7484   ins_encode %{
 7485     __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7486   %}
 7487   ins_pipe( pipe_slow );
 7488 %}
 7489 
 7490 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7491   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7492   match(Set dst (MinF a b));
 7493   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7494 
 7495   format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7496   ins_encode %{
 7497     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7498                     true /*min*/, true /*single*/);
 7499   %}
 7500   ins_pipe( pipe_slow );
 7501 %}
 7502 
 7503 // max = java.lang.Math.min(double a, double b)
 7504 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
 7505   predicate(VM_Version::supports_avx10_2());
 7506   match(Set dst (MinD a b));
 7507   format %{ "minD $dst, $a, $b" %}
 7508   ins_encode %{
 7509     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7510   %}
 7511   ins_pipe( pipe_slow );
 7512 %}
 7513 
 7514 // min = java.lang.Math.min(double a, double b)
 7515 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7516   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7517   match(Set dst (MinD a b));
 7518   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7519     format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7520   ins_encode %{
 7521     __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7522   %}
 7523   ins_pipe( pipe_slow );
 7524 %}
 7525 
 7526 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7527   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7528   match(Set dst (MinD a b));
 7529   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7530 
 7531   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7532   ins_encode %{
 7533     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7534                     true /*min*/, false /*single*/);
 7535   %}
 7536   ins_pipe( pipe_slow );
 7537 %}
 7538 
 7539 // Load Effective Address
 7540 instruct leaP8(rRegP dst, indOffset8 mem)
 7541 %{
 7542   match(Set dst mem);
 7543 
 7544   ins_cost(110); // XXX
 7545   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7546   ins_encode %{
 7547     __ leaq($dst$$Register, $mem$$Address);
 7548   %}
 7549   ins_pipe(ialu_reg_reg_fat);
 7550 %}
 7551 
 7552 instruct leaP32(rRegP dst, indOffset32 mem)
 7553 %{
 7554   match(Set dst mem);
 7555 
 7556   ins_cost(110);
 7557   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7558   ins_encode %{
 7559     __ leaq($dst$$Register, $mem$$Address);
 7560   %}
 7561   ins_pipe(ialu_reg_reg_fat);
 7562 %}
 7563 
 7564 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7565 %{
 7566   match(Set dst mem);
 7567 
 7568   ins_cost(110);
 7569   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7570   ins_encode %{
 7571     __ leaq($dst$$Register, $mem$$Address);
 7572   %}
 7573   ins_pipe(ialu_reg_reg_fat);
 7574 %}
 7575 
 7576 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7577 %{
 7578   match(Set dst mem);
 7579 
 7580   ins_cost(110);
 7581   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7582   ins_encode %{
 7583     __ leaq($dst$$Register, $mem$$Address);
 7584   %}
 7585   ins_pipe(ialu_reg_reg_fat);
 7586 %}
 7587 
 7588 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7589 %{
 7590   match(Set dst mem);
 7591 
 7592   ins_cost(110);
 7593   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7594   ins_encode %{
 7595     __ leaq($dst$$Register, $mem$$Address);
 7596   %}
 7597   ins_pipe(ialu_reg_reg_fat);
 7598 %}
 7599 
 7600 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7601 %{
 7602   match(Set dst mem);
 7603 
 7604   ins_cost(110);
 7605   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7606   ins_encode %{
 7607     __ leaq($dst$$Register, $mem$$Address);
 7608   %}
 7609   ins_pipe(ialu_reg_reg_fat);
 7610 %}
 7611 
 7612 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7613 %{
 7614   match(Set dst mem);
 7615 
 7616   ins_cost(110);
 7617   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7618   ins_encode %{
 7619     __ leaq($dst$$Register, $mem$$Address);
 7620   %}
 7621   ins_pipe(ialu_reg_reg_fat);
 7622 %}
 7623 
 7624 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7625 %{
 7626   match(Set dst mem);
 7627 
 7628   ins_cost(110);
 7629   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7630   ins_encode %{
 7631     __ leaq($dst$$Register, $mem$$Address);
 7632   %}
 7633   ins_pipe(ialu_reg_reg_fat);
 7634 %}
 7635 
 7636 // Load Effective Address which uses Narrow (32-bits) oop
 7637 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7638 %{
 7639   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7640   match(Set dst mem);
 7641 
 7642   ins_cost(110);
 7643   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7644   ins_encode %{
 7645     __ leaq($dst$$Register, $mem$$Address);
 7646   %}
 7647   ins_pipe(ialu_reg_reg_fat);
 7648 %}
 7649 
 7650 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7651 %{
 7652   predicate(CompressedOops::shift() == 0);
 7653   match(Set dst mem);
 7654 
 7655   ins_cost(110); // XXX
 7656   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7657   ins_encode %{
 7658     __ leaq($dst$$Register, $mem$$Address);
 7659   %}
 7660   ins_pipe(ialu_reg_reg_fat);
 7661 %}
 7662 
 7663 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7664 %{
 7665   predicate(CompressedOops::shift() == 0);
 7666   match(Set dst mem);
 7667 
 7668   ins_cost(110);
 7669   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7670   ins_encode %{
 7671     __ leaq($dst$$Register, $mem$$Address);
 7672   %}
 7673   ins_pipe(ialu_reg_reg_fat);
 7674 %}
 7675 
 7676 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7677 %{
 7678   predicate(CompressedOops::shift() == 0);
 7679   match(Set dst mem);
 7680 
 7681   ins_cost(110);
 7682   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7683   ins_encode %{
 7684     __ leaq($dst$$Register, $mem$$Address);
 7685   %}
 7686   ins_pipe(ialu_reg_reg_fat);
 7687 %}
 7688 
 7689 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7690 %{
 7691   predicate(CompressedOops::shift() == 0);
 7692   match(Set dst mem);
 7693 
 7694   ins_cost(110);
 7695   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7696   ins_encode %{
 7697     __ leaq($dst$$Register, $mem$$Address);
 7698   %}
 7699   ins_pipe(ialu_reg_reg_fat);
 7700 %}
 7701 
 7702 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7703 %{
 7704   predicate(CompressedOops::shift() == 0);
 7705   match(Set dst mem);
 7706 
 7707   ins_cost(110);
 7708   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7709   ins_encode %{
 7710     __ leaq($dst$$Register, $mem$$Address);
 7711   %}
 7712   ins_pipe(ialu_reg_reg_fat);
 7713 %}
 7714 
 7715 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7716 %{
 7717   predicate(CompressedOops::shift() == 0);
 7718   match(Set dst mem);
 7719 
 7720   ins_cost(110);
 7721   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7722   ins_encode %{
 7723     __ leaq($dst$$Register, $mem$$Address);
 7724   %}
 7725   ins_pipe(ialu_reg_reg_fat);
 7726 %}
 7727 
 7728 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7729 %{
 7730   predicate(CompressedOops::shift() == 0);
 7731   match(Set dst mem);
 7732 
 7733   ins_cost(110);
 7734   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7735   ins_encode %{
 7736     __ leaq($dst$$Register, $mem$$Address);
 7737   %}
 7738   ins_pipe(ialu_reg_reg_fat);
 7739 %}
 7740 
 7741 instruct loadConI(rRegI dst, immI src)
 7742 %{
 7743   match(Set dst src);
 7744 
 7745   format %{ "movl    $dst, $src\t# int" %}
 7746   ins_encode %{
 7747     __ movl($dst$$Register, $src$$constant);
 7748   %}
 7749   ins_pipe(ialu_reg_fat); // XXX
 7750 %}
 7751 
 7752 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7753 %{
 7754   match(Set dst src);
 7755   effect(KILL cr);
 7756 
 7757   ins_cost(50);
 7758   format %{ "xorl    $dst, $dst\t# int" %}
 7759   ins_encode %{
 7760     __ xorl($dst$$Register, $dst$$Register);
 7761   %}
 7762   ins_pipe(ialu_reg);
 7763 %}
 7764 
 7765 instruct loadConL(rRegL dst, immL src)
 7766 %{
 7767   match(Set dst src);
 7768 
 7769   ins_cost(150);
 7770   format %{ "movq    $dst, $src\t# long" %}
 7771   ins_encode %{
 7772     __ mov64($dst$$Register, $src$$constant);
 7773   %}
 7774   ins_pipe(ialu_reg);
 7775 %}
 7776 
 7777 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7778 %{
 7779   match(Set dst src);
 7780   effect(KILL cr);
 7781 
 7782   ins_cost(50);
 7783   format %{ "xorl    $dst, $dst\t# long" %}
 7784   ins_encode %{
 7785     __ xorl($dst$$Register, $dst$$Register);
 7786   %}
 7787   ins_pipe(ialu_reg); // XXX
 7788 %}
 7789 
 7790 instruct loadConUL32(rRegL dst, immUL32 src)
 7791 %{
 7792   match(Set dst src);
 7793 
 7794   ins_cost(60);
 7795   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7796   ins_encode %{
 7797     __ movl($dst$$Register, $src$$constant);
 7798   %}
 7799   ins_pipe(ialu_reg);
 7800 %}
 7801 
 7802 instruct loadConL32(rRegL dst, immL32 src)
 7803 %{
 7804   match(Set dst src);
 7805 
 7806   ins_cost(70);
 7807   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7808   ins_encode %{
 7809     __ movq($dst$$Register, $src$$constant);
 7810   %}
 7811   ins_pipe(ialu_reg);
 7812 %}
 7813 
 7814 instruct loadConP(rRegP dst, immP con) %{
 7815   match(Set dst con);
 7816 
 7817   format %{ "movq    $dst, $con\t# ptr" %}
 7818   ins_encode %{
 7819     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7820   %}
 7821   ins_pipe(ialu_reg_fat); // XXX
 7822 %}
 7823 
 7824 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7825 %{
 7826   match(Set dst src);
 7827   effect(KILL cr);
 7828 
 7829   ins_cost(50);
 7830   format %{ "xorl    $dst, $dst\t# ptr" %}
 7831   ins_encode %{
 7832     __ xorl($dst$$Register, $dst$$Register);
 7833   %}
 7834   ins_pipe(ialu_reg);
 7835 %}
 7836 
 7837 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7838 %{
 7839   match(Set dst src);
 7840   effect(KILL cr);
 7841 
 7842   ins_cost(60);
 7843   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7844   ins_encode %{
 7845     __ movl($dst$$Register, $src$$constant);
 7846   %}
 7847   ins_pipe(ialu_reg);
 7848 %}
 7849 
 7850 instruct loadConF(regF dst, immF con) %{
 7851   match(Set dst con);
 7852   ins_cost(125);
 7853   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7854   ins_encode %{
 7855     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7856   %}
 7857   ins_pipe(pipe_slow);
 7858 %}
 7859 
 7860 instruct loadConH(regF dst, immH con) %{
 7861   match(Set dst con);
 7862   ins_cost(125);
 7863   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7864   ins_encode %{
 7865     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7866   %}
 7867   ins_pipe(pipe_slow);
 7868 %}
 7869 
 7870 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7871   match(Set dst src);
 7872   effect(KILL cr);
 7873   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7874   ins_encode %{
 7875     __ xorq($dst$$Register, $dst$$Register);
 7876   %}
 7877   ins_pipe(ialu_reg);
 7878 %}
 7879 
 7880 instruct loadConN(rRegN dst, immN src) %{
 7881   match(Set dst src);
 7882 
 7883   ins_cost(125);
 7884   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7885   ins_encode %{
 7886     address con = (address)$src$$constant;
 7887     if (con == nullptr) {
 7888       ShouldNotReachHere();
 7889     } else {
 7890       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7891     }
 7892   %}
 7893   ins_pipe(ialu_reg_fat); // XXX
 7894 %}
 7895 
 7896 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7897   match(Set dst src);
 7898 
 7899   ins_cost(125);
 7900   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7901   ins_encode %{
 7902     address con = (address)$src$$constant;
 7903     if (con == nullptr) {
 7904       ShouldNotReachHere();
 7905     } else {
 7906       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7907     }
 7908   %}
 7909   ins_pipe(ialu_reg_fat); // XXX
 7910 %}
 7911 
 7912 instruct loadConF0(regF dst, immF0 src)
 7913 %{
 7914   match(Set dst src);
 7915   ins_cost(100);
 7916 
 7917   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7918   ins_encode %{
 7919     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7920   %}
 7921   ins_pipe(pipe_slow);
 7922 %}
 7923 
 7924 // Use the same format since predicate() can not be used here.
 7925 instruct loadConD(regD dst, immD con) %{
 7926   match(Set dst con);
 7927   ins_cost(125);
 7928   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7929   ins_encode %{
 7930     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7931   %}
 7932   ins_pipe(pipe_slow);
 7933 %}
 7934 
 7935 instruct loadConD0(regD dst, immD0 src)
 7936 %{
 7937   match(Set dst src);
 7938   ins_cost(100);
 7939 
 7940   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7941   ins_encode %{
 7942     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7943   %}
 7944   ins_pipe(pipe_slow);
 7945 %}
 7946 
 7947 instruct loadSSI(rRegI dst, stackSlotI src)
 7948 %{
 7949   match(Set dst src);
 7950 
 7951   ins_cost(125);
 7952   format %{ "movl    $dst, $src\t# int stk" %}
 7953   ins_encode %{
 7954     __ movl($dst$$Register, $src$$Address);
 7955   %}
 7956   ins_pipe(ialu_reg_mem);
 7957 %}
 7958 
 7959 instruct loadSSL(rRegL dst, stackSlotL src)
 7960 %{
 7961   match(Set dst src);
 7962 
 7963   ins_cost(125);
 7964   format %{ "movq    $dst, $src\t# long stk" %}
 7965   ins_encode %{
 7966     __ movq($dst$$Register, $src$$Address);
 7967   %}
 7968   ins_pipe(ialu_reg_mem);
 7969 %}
 7970 
 7971 instruct loadSSP(rRegP dst, stackSlotP src)
 7972 %{
 7973   match(Set dst src);
 7974 
 7975   ins_cost(125);
 7976   format %{ "movq    $dst, $src\t# ptr stk" %}
 7977   ins_encode %{
 7978     __ movq($dst$$Register, $src$$Address);
 7979   %}
 7980   ins_pipe(ialu_reg_mem);
 7981 %}
 7982 
 7983 instruct loadSSF(regF dst, stackSlotF src)
 7984 %{
 7985   match(Set dst src);
 7986 
 7987   ins_cost(125);
 7988   format %{ "movss   $dst, $src\t# float stk" %}
 7989   ins_encode %{
 7990     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7991   %}
 7992   ins_pipe(pipe_slow); // XXX
 7993 %}
 7994 
 7995 // Use the same format since predicate() can not be used here.
 7996 instruct loadSSD(regD dst, stackSlotD src)
 7997 %{
 7998   match(Set dst src);
 7999 
 8000   ins_cost(125);
 8001   format %{ "movsd   $dst, $src\t# double stk" %}
 8002   ins_encode  %{
 8003     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 8004   %}
 8005   ins_pipe(pipe_slow); // XXX
 8006 %}
 8007 
 8008 // Prefetch instructions for allocation.
 8009 // Must be safe to execute with invalid address (cannot fault).
 8010 
 8011 instruct prefetchAlloc( memory mem ) %{
 8012   predicate(AllocatePrefetchInstr==3);
 8013   match(PrefetchAllocation mem);
 8014   ins_cost(125);
 8015 
 8016   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 8017   ins_encode %{
 8018     __ prefetchw($mem$$Address);
 8019   %}
 8020   ins_pipe(ialu_mem);
 8021 %}
 8022 
 8023 instruct prefetchAllocNTA( memory mem ) %{
 8024   predicate(AllocatePrefetchInstr==0);
 8025   match(PrefetchAllocation mem);
 8026   ins_cost(125);
 8027 
 8028   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 8029   ins_encode %{
 8030     __ prefetchnta($mem$$Address);
 8031   %}
 8032   ins_pipe(ialu_mem);
 8033 %}
 8034 
 8035 instruct prefetchAllocT0( memory mem ) %{
 8036   predicate(AllocatePrefetchInstr==1);
 8037   match(PrefetchAllocation mem);
 8038   ins_cost(125);
 8039 
 8040   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 8041   ins_encode %{
 8042     __ prefetcht0($mem$$Address);
 8043   %}
 8044   ins_pipe(ialu_mem);
 8045 %}
 8046 
 8047 instruct prefetchAllocT2( memory mem ) %{
 8048   predicate(AllocatePrefetchInstr==2);
 8049   match(PrefetchAllocation mem);
 8050   ins_cost(125);
 8051 
 8052   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 8053   ins_encode %{
 8054     __ prefetcht2($mem$$Address);
 8055   %}
 8056   ins_pipe(ialu_mem);
 8057 %}
 8058 
 8059 //----------Store Instructions-------------------------------------------------
 8060 
 8061 // Store Byte
 8062 instruct storeB(memory mem, rRegI src)
 8063 %{
 8064   match(Set mem (StoreB mem src));
 8065 
 8066   ins_cost(125); // XXX
 8067   format %{ "movb    $mem, $src\t# byte" %}
 8068   ins_encode %{
 8069     __ movb($mem$$Address, $src$$Register);
 8070   %}
 8071   ins_pipe(ialu_mem_reg);
 8072 %}
 8073 
 8074 // Store Char/Short
 8075 instruct storeC(memory mem, rRegI src)
 8076 %{
 8077   match(Set mem (StoreC mem src));
 8078 
 8079   ins_cost(125); // XXX
 8080   format %{ "movw    $mem, $src\t# char/short" %}
 8081   ins_encode %{
 8082     __ movw($mem$$Address, $src$$Register);
 8083   %}
 8084   ins_pipe(ialu_mem_reg);
 8085 %}
 8086 
 8087 // Store Integer
 8088 instruct storeI(memory mem, rRegI src)
 8089 %{
 8090   match(Set mem (StoreI mem src));
 8091 
 8092   ins_cost(125); // XXX
 8093   format %{ "movl    $mem, $src\t# int" %}
 8094   ins_encode %{
 8095     __ movl($mem$$Address, $src$$Register);
 8096   %}
 8097   ins_pipe(ialu_mem_reg);
 8098 %}
 8099 
 8100 // Store Long
 8101 instruct storeL(memory mem, rRegL src)
 8102 %{
 8103   match(Set mem (StoreL mem src));
 8104 
 8105   ins_cost(125); // XXX
 8106   format %{ "movq    $mem, $src\t# long" %}
 8107   ins_encode %{
 8108     __ movq($mem$$Address, $src$$Register);
 8109   %}
 8110   ins_pipe(ialu_mem_reg); // XXX
 8111 %}
 8112 
 8113 // Store Pointer
 8114 instruct storeP(memory mem, any_RegP src)
 8115 %{
 8116   predicate(n->as_Store()->barrier_data() == 0);
 8117   match(Set mem (StoreP mem src));
 8118 
 8119   ins_cost(125); // XXX
 8120   format %{ "movq    $mem, $src\t# ptr" %}
 8121   ins_encode %{
 8122     __ movq($mem$$Address, $src$$Register);
 8123   %}
 8124   ins_pipe(ialu_mem_reg);
 8125 %}
 8126 
 8127 instruct storeImmP0(memory mem, immP0 zero)
 8128 %{
 8129   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8130   match(Set mem (StoreP mem zero));
 8131 
 8132   ins_cost(125); // XXX
 8133   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8134   ins_encode %{
 8135     __ movq($mem$$Address, r12);
 8136   %}
 8137   ins_pipe(ialu_mem_reg);
 8138 %}
 8139 
 8140 // Store Null Pointer, mark word, or other simple pointer constant.
 8141 instruct storeImmP(memory mem, immP31 src)
 8142 %{
 8143   predicate(n->as_Store()->barrier_data() == 0);
 8144   match(Set mem (StoreP mem src));
 8145 
 8146   ins_cost(150); // XXX
 8147   format %{ "movq    $mem, $src\t# ptr" %}
 8148   ins_encode %{
 8149     __ movq($mem$$Address, $src$$constant);
 8150   %}
 8151   ins_pipe(ialu_mem_imm);
 8152 %}
 8153 
 8154 // Store Compressed Pointer
 8155 instruct storeN(memory mem, rRegN src)
 8156 %{
 8157   predicate(n->as_Store()->barrier_data() == 0);
 8158   match(Set mem (StoreN mem src));
 8159 
 8160   ins_cost(125); // XXX
 8161   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8162   ins_encode %{
 8163     __ movl($mem$$Address, $src$$Register);
 8164   %}
 8165   ins_pipe(ialu_mem_reg);
 8166 %}
 8167 
 8168 instruct storeNKlass(memory mem, rRegN src)
 8169 %{
 8170   match(Set mem (StoreNKlass mem src));
 8171 
 8172   ins_cost(125); // XXX
 8173   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8174   ins_encode %{
 8175     __ movl($mem$$Address, $src$$Register);
 8176   %}
 8177   ins_pipe(ialu_mem_reg);
 8178 %}
 8179 
 8180 instruct storeImmN0(memory mem, immN0 zero)
 8181 %{
 8182   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8183   match(Set mem (StoreN mem zero));
 8184 
 8185   ins_cost(125); // XXX
 8186   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8187   ins_encode %{
 8188     __ movl($mem$$Address, r12);
 8189   %}
 8190   ins_pipe(ialu_mem_reg);
 8191 %}
 8192 
 8193 instruct storeImmN(memory mem, immN src)
 8194 %{
 8195   predicate(n->as_Store()->barrier_data() == 0);
 8196   match(Set mem (StoreN mem src));
 8197 
 8198   ins_cost(150); // XXX
 8199   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8200   ins_encode %{
 8201     address con = (address)$src$$constant;
 8202     if (con == nullptr) {
 8203       __ movl($mem$$Address, 0);
 8204     } else {
 8205       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8206     }
 8207   %}
 8208   ins_pipe(ialu_mem_imm);
 8209 %}
 8210 
 8211 instruct storeImmNKlass(memory mem, immNKlass src)
 8212 %{
 8213   match(Set mem (StoreNKlass mem src));
 8214 
 8215   ins_cost(150); // XXX
 8216   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8217   ins_encode %{
 8218     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8219   %}
 8220   ins_pipe(ialu_mem_imm);
 8221 %}
 8222 
 8223 // Store Integer Immediate
 8224 instruct storeImmI0(memory mem, immI_0 zero)
 8225 %{
 8226   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8227   match(Set mem (StoreI mem zero));
 8228 
 8229   ins_cost(125); // XXX
 8230   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8231   ins_encode %{
 8232     __ movl($mem$$Address, r12);
 8233   %}
 8234   ins_pipe(ialu_mem_reg);
 8235 %}
 8236 
 8237 instruct storeImmI(memory mem, immI src)
 8238 %{
 8239   match(Set mem (StoreI mem src));
 8240 
 8241   ins_cost(150);
 8242   format %{ "movl    $mem, $src\t# int" %}
 8243   ins_encode %{
 8244     __ movl($mem$$Address, $src$$constant);
 8245   %}
 8246   ins_pipe(ialu_mem_imm);
 8247 %}
 8248 
 8249 // Store Long Immediate
 8250 instruct storeImmL0(memory mem, immL0 zero)
 8251 %{
 8252   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8253   match(Set mem (StoreL mem zero));
 8254 
 8255   ins_cost(125); // XXX
 8256   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8257   ins_encode %{
 8258     __ movq($mem$$Address, r12);
 8259   %}
 8260   ins_pipe(ialu_mem_reg);
 8261 %}
 8262 
 8263 instruct storeImmL(memory mem, immL32 src)
 8264 %{
 8265   match(Set mem (StoreL mem src));
 8266 
 8267   ins_cost(150);
 8268   format %{ "movq    $mem, $src\t# long" %}
 8269   ins_encode %{
 8270     __ movq($mem$$Address, $src$$constant);
 8271   %}
 8272   ins_pipe(ialu_mem_imm);
 8273 %}
 8274 
 8275 // Store Short/Char Immediate
 8276 instruct storeImmC0(memory mem, immI_0 zero)
 8277 %{
 8278   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8279   match(Set mem (StoreC mem zero));
 8280 
 8281   ins_cost(125); // XXX
 8282   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8283   ins_encode %{
 8284     __ movw($mem$$Address, r12);
 8285   %}
 8286   ins_pipe(ialu_mem_reg);
 8287 %}
 8288 
 8289 instruct storeImmI16(memory mem, immI16 src)
 8290 %{
 8291   predicate(UseStoreImmI16);
 8292   match(Set mem (StoreC mem src));
 8293 
 8294   ins_cost(150);
 8295   format %{ "movw    $mem, $src\t# short/char" %}
 8296   ins_encode %{
 8297     __ movw($mem$$Address, $src$$constant);
 8298   %}
 8299   ins_pipe(ialu_mem_imm);
 8300 %}
 8301 
 8302 // Store Byte Immediate
 8303 instruct storeImmB0(memory mem, immI_0 zero)
 8304 %{
 8305   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8306   match(Set mem (StoreB mem zero));
 8307 
 8308   ins_cost(125); // XXX
 8309   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8310   ins_encode %{
 8311     __ movb($mem$$Address, r12);
 8312   %}
 8313   ins_pipe(ialu_mem_reg);
 8314 %}
 8315 
 8316 instruct storeImmB(memory mem, immI8 src)
 8317 %{
 8318   match(Set mem (StoreB mem src));
 8319 
 8320   ins_cost(150); // XXX
 8321   format %{ "movb    $mem, $src\t# byte" %}
 8322   ins_encode %{
 8323     __ movb($mem$$Address, $src$$constant);
 8324   %}
 8325   ins_pipe(ialu_mem_imm);
 8326 %}
 8327 
 8328 // Store Float
 8329 instruct storeF(memory mem, regF src)
 8330 %{
 8331   match(Set mem (StoreF mem src));
 8332 
 8333   ins_cost(95); // XXX
 8334   format %{ "movss   $mem, $src\t# float" %}
 8335   ins_encode %{
 8336     __ movflt($mem$$Address, $src$$XMMRegister);
 8337   %}
 8338   ins_pipe(pipe_slow); // XXX
 8339 %}
 8340 
 8341 // Store immediate Float value (it is faster than store from XMM register)
 8342 instruct storeF0(memory mem, immF0 zero)
 8343 %{
 8344   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8345   match(Set mem (StoreF mem zero));
 8346 
 8347   ins_cost(25); // XXX
 8348   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8349   ins_encode %{
 8350     __ movl($mem$$Address, r12);
 8351   %}
 8352   ins_pipe(ialu_mem_reg);
 8353 %}
 8354 
 8355 instruct storeF_imm(memory mem, immF src)
 8356 %{
 8357   match(Set mem (StoreF mem src));
 8358 
 8359   ins_cost(50);
 8360   format %{ "movl    $mem, $src\t# float" %}
 8361   ins_encode %{
 8362     __ movl($mem$$Address, jint_cast($src$$constant));
 8363   %}
 8364   ins_pipe(ialu_mem_imm);
 8365 %}
 8366 
 8367 // Store Double
 8368 instruct storeD(memory mem, regD src)
 8369 %{
 8370   match(Set mem (StoreD mem src));
 8371 
 8372   ins_cost(95); // XXX
 8373   format %{ "movsd   $mem, $src\t# double" %}
 8374   ins_encode %{
 8375     __ movdbl($mem$$Address, $src$$XMMRegister);
 8376   %}
 8377   ins_pipe(pipe_slow); // XXX
 8378 %}
 8379 
 8380 // Store immediate double 0.0 (it is faster than store from XMM register)
 8381 instruct storeD0_imm(memory mem, immD0 src)
 8382 %{
 8383   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8384   match(Set mem (StoreD mem src));
 8385 
 8386   ins_cost(50);
 8387   format %{ "movq    $mem, $src\t# double 0." %}
 8388   ins_encode %{
 8389     __ movq($mem$$Address, $src$$constant);
 8390   %}
 8391   ins_pipe(ialu_mem_imm);
 8392 %}
 8393 
 8394 instruct storeD0(memory mem, immD0 zero)
 8395 %{
 8396   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8397   match(Set mem (StoreD mem zero));
 8398 
 8399   ins_cost(25); // XXX
 8400   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8401   ins_encode %{
 8402     __ movq($mem$$Address, r12);
 8403   %}
 8404   ins_pipe(ialu_mem_reg);
 8405 %}
 8406 
 8407 instruct storeSSI(stackSlotI dst, rRegI src)
 8408 %{
 8409   match(Set dst src);
 8410 
 8411   ins_cost(100);
 8412   format %{ "movl    $dst, $src\t# int stk" %}
 8413   ins_encode %{
 8414     __ movl($dst$$Address, $src$$Register);
 8415   %}
 8416   ins_pipe( ialu_mem_reg );
 8417 %}
 8418 
 8419 instruct storeSSL(stackSlotL dst, rRegL src)
 8420 %{
 8421   match(Set dst src);
 8422 
 8423   ins_cost(100);
 8424   format %{ "movq    $dst, $src\t# long stk" %}
 8425   ins_encode %{
 8426     __ movq($dst$$Address, $src$$Register);
 8427   %}
 8428   ins_pipe(ialu_mem_reg);
 8429 %}
 8430 
 8431 instruct storeSSP(stackSlotP dst, rRegP src)
 8432 %{
 8433   match(Set dst src);
 8434 
 8435   ins_cost(100);
 8436   format %{ "movq    $dst, $src\t# ptr stk" %}
 8437   ins_encode %{
 8438     __ movq($dst$$Address, $src$$Register);
 8439   %}
 8440   ins_pipe(ialu_mem_reg);
 8441 %}
 8442 
 8443 instruct storeSSF(stackSlotF dst, regF src)
 8444 %{
 8445   match(Set dst src);
 8446 
 8447   ins_cost(95); // XXX
 8448   format %{ "movss   $dst, $src\t# float stk" %}
 8449   ins_encode %{
 8450     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8451   %}
 8452   ins_pipe(pipe_slow); // XXX
 8453 %}
 8454 
 8455 instruct storeSSD(stackSlotD dst, regD src)
 8456 %{
 8457   match(Set dst src);
 8458 
 8459   ins_cost(95); // XXX
 8460   format %{ "movsd   $dst, $src\t# double stk" %}
 8461   ins_encode %{
 8462     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8463   %}
 8464   ins_pipe(pipe_slow); // XXX
 8465 %}
 8466 
 8467 instruct cacheWB(indirect addr)
 8468 %{
 8469   predicate(VM_Version::supports_data_cache_line_flush());
 8470   match(CacheWB addr);
 8471 
 8472   ins_cost(100);
 8473   format %{"cache wb $addr" %}
 8474   ins_encode %{
 8475     assert($addr->index_position() < 0, "should be");
 8476     assert($addr$$disp == 0, "should be");
 8477     __ cache_wb(Address($addr$$base$$Register, 0));
 8478   %}
 8479   ins_pipe(pipe_slow); // XXX
 8480 %}
 8481 
 8482 instruct cacheWBPreSync()
 8483 %{
 8484   predicate(VM_Version::supports_data_cache_line_flush());
 8485   match(CacheWBPreSync);
 8486 
 8487   ins_cost(100);
 8488   format %{"cache wb presync" %}
 8489   ins_encode %{
 8490     __ cache_wbsync(true);
 8491   %}
 8492   ins_pipe(pipe_slow); // XXX
 8493 %}
 8494 
 8495 instruct cacheWBPostSync()
 8496 %{
 8497   predicate(VM_Version::supports_data_cache_line_flush());
 8498   match(CacheWBPostSync);
 8499 
 8500   ins_cost(100);
 8501   format %{"cache wb postsync" %}
 8502   ins_encode %{
 8503     __ cache_wbsync(false);
 8504   %}
 8505   ins_pipe(pipe_slow); // XXX
 8506 %}
 8507 
 8508 //----------BSWAP Instructions-------------------------------------------------
 8509 instruct bytes_reverse_int(rRegI dst) %{
 8510   match(Set dst (ReverseBytesI dst));
 8511 
 8512   format %{ "bswapl  $dst" %}
 8513   ins_encode %{
 8514     __ bswapl($dst$$Register);
 8515   %}
 8516   ins_pipe( ialu_reg );
 8517 %}
 8518 
 8519 instruct bytes_reverse_long(rRegL dst) %{
 8520   match(Set dst (ReverseBytesL dst));
 8521 
 8522   format %{ "bswapq  $dst" %}
 8523   ins_encode %{
 8524     __ bswapq($dst$$Register);
 8525   %}
 8526   ins_pipe( ialu_reg);
 8527 %}
 8528 
 8529 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8530   match(Set dst (ReverseBytesUS dst));
 8531   effect(KILL cr);
 8532 
 8533   format %{ "bswapl  $dst\n\t"
 8534             "shrl    $dst,16\n\t" %}
 8535   ins_encode %{
 8536     __ bswapl($dst$$Register);
 8537     __ shrl($dst$$Register, 16);
 8538   %}
 8539   ins_pipe( ialu_reg );
 8540 %}
 8541 
 8542 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8543   match(Set dst (ReverseBytesS dst));
 8544   effect(KILL cr);
 8545 
 8546   format %{ "bswapl  $dst\n\t"
 8547             "sar     $dst,16\n\t" %}
 8548   ins_encode %{
 8549     __ bswapl($dst$$Register);
 8550     __ sarl($dst$$Register, 16);
 8551   %}
 8552   ins_pipe( ialu_reg );
 8553 %}
 8554 
 8555 //---------- Zeros Count Instructions ------------------------------------------
 8556 
 8557 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8558   predicate(UseCountLeadingZerosInstruction);
 8559   match(Set dst (CountLeadingZerosI src));
 8560   effect(KILL cr);
 8561 
 8562   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8563   ins_encode %{
 8564     __ lzcntl($dst$$Register, $src$$Register);
 8565   %}
 8566   ins_pipe(ialu_reg);
 8567 %}
 8568 
 8569 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8570   predicate(UseCountLeadingZerosInstruction);
 8571   match(Set dst (CountLeadingZerosI (LoadI src)));
 8572   effect(KILL cr);
 8573   ins_cost(175);
 8574   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8575   ins_encode %{
 8576     __ lzcntl($dst$$Register, $src$$Address);
 8577   %}
 8578   ins_pipe(ialu_reg_mem);
 8579 %}
 8580 
 8581 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8582   predicate(!UseCountLeadingZerosInstruction);
 8583   match(Set dst (CountLeadingZerosI src));
 8584   effect(KILL cr);
 8585 
 8586   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8587             "jnz     skip\n\t"
 8588             "movl    $dst, -1\n"
 8589       "skip:\n\t"
 8590             "negl    $dst\n\t"
 8591             "addl    $dst, 31" %}
 8592   ins_encode %{
 8593     Register Rdst = $dst$$Register;
 8594     Register Rsrc = $src$$Register;
 8595     Label skip;
 8596     __ bsrl(Rdst, Rsrc);
 8597     __ jccb(Assembler::notZero, skip);
 8598     __ movl(Rdst, -1);
 8599     __ bind(skip);
 8600     __ negl(Rdst);
 8601     __ addl(Rdst, BitsPerInt - 1);
 8602   %}
 8603   ins_pipe(ialu_reg);
 8604 %}
 8605 
 8606 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8607   predicate(UseCountLeadingZerosInstruction);
 8608   match(Set dst (CountLeadingZerosL src));
 8609   effect(KILL cr);
 8610 
 8611   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8612   ins_encode %{
 8613     __ lzcntq($dst$$Register, $src$$Register);
 8614   %}
 8615   ins_pipe(ialu_reg);
 8616 %}
 8617 
 8618 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8619   predicate(UseCountLeadingZerosInstruction);
 8620   match(Set dst (CountLeadingZerosL (LoadL src)));
 8621   effect(KILL cr);
 8622   ins_cost(175);
 8623   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8624   ins_encode %{
 8625     __ lzcntq($dst$$Register, $src$$Address);
 8626   %}
 8627   ins_pipe(ialu_reg_mem);
 8628 %}
 8629 
 8630 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8631   predicate(!UseCountLeadingZerosInstruction);
 8632   match(Set dst (CountLeadingZerosL src));
 8633   effect(KILL cr);
 8634 
 8635   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8636             "jnz     skip\n\t"
 8637             "movl    $dst, -1\n"
 8638       "skip:\n\t"
 8639             "negl    $dst\n\t"
 8640             "addl    $dst, 63" %}
 8641   ins_encode %{
 8642     Register Rdst = $dst$$Register;
 8643     Register Rsrc = $src$$Register;
 8644     Label skip;
 8645     __ bsrq(Rdst, Rsrc);
 8646     __ jccb(Assembler::notZero, skip);
 8647     __ movl(Rdst, -1);
 8648     __ bind(skip);
 8649     __ negl(Rdst);
 8650     __ addl(Rdst, BitsPerLong - 1);
 8651   %}
 8652   ins_pipe(ialu_reg);
 8653 %}
 8654 
 8655 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8656   predicate(UseCountTrailingZerosInstruction);
 8657   match(Set dst (CountTrailingZerosI src));
 8658   effect(KILL cr);
 8659 
 8660   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8661   ins_encode %{
 8662     __ tzcntl($dst$$Register, $src$$Register);
 8663   %}
 8664   ins_pipe(ialu_reg);
 8665 %}
 8666 
 8667 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8668   predicate(UseCountTrailingZerosInstruction);
 8669   match(Set dst (CountTrailingZerosI (LoadI src)));
 8670   effect(KILL cr);
 8671   ins_cost(175);
 8672   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8673   ins_encode %{
 8674     __ tzcntl($dst$$Register, $src$$Address);
 8675   %}
 8676   ins_pipe(ialu_reg_mem);
 8677 %}
 8678 
 8679 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8680   predicate(!UseCountTrailingZerosInstruction);
 8681   match(Set dst (CountTrailingZerosI src));
 8682   effect(KILL cr);
 8683 
 8684   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8685             "jnz     done\n\t"
 8686             "movl    $dst, 32\n"
 8687       "done:" %}
 8688   ins_encode %{
 8689     Register Rdst = $dst$$Register;
 8690     Label done;
 8691     __ bsfl(Rdst, $src$$Register);
 8692     __ jccb(Assembler::notZero, done);
 8693     __ movl(Rdst, BitsPerInt);
 8694     __ bind(done);
 8695   %}
 8696   ins_pipe(ialu_reg);
 8697 %}
 8698 
 8699 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8700   predicate(UseCountTrailingZerosInstruction);
 8701   match(Set dst (CountTrailingZerosL src));
 8702   effect(KILL cr);
 8703 
 8704   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8705   ins_encode %{
 8706     __ tzcntq($dst$$Register, $src$$Register);
 8707   %}
 8708   ins_pipe(ialu_reg);
 8709 %}
 8710 
 8711 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8712   predicate(UseCountTrailingZerosInstruction);
 8713   match(Set dst (CountTrailingZerosL (LoadL src)));
 8714   effect(KILL cr);
 8715   ins_cost(175);
 8716   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8717   ins_encode %{
 8718     __ tzcntq($dst$$Register, $src$$Address);
 8719   %}
 8720   ins_pipe(ialu_reg_mem);
 8721 %}
 8722 
 8723 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8724   predicate(!UseCountTrailingZerosInstruction);
 8725   match(Set dst (CountTrailingZerosL src));
 8726   effect(KILL cr);
 8727 
 8728   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8729             "jnz     done\n\t"
 8730             "movl    $dst, 64\n"
 8731       "done:" %}
 8732   ins_encode %{
 8733     Register Rdst = $dst$$Register;
 8734     Label done;
 8735     __ bsfq(Rdst, $src$$Register);
 8736     __ jccb(Assembler::notZero, done);
 8737     __ movl(Rdst, BitsPerLong);
 8738     __ bind(done);
 8739   %}
 8740   ins_pipe(ialu_reg);
 8741 %}
 8742 
 8743 //--------------- Reverse Operation Instructions ----------------
 8744 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8745   predicate(!VM_Version::supports_gfni());
 8746   match(Set dst (ReverseI src));
 8747   effect(TEMP dst, TEMP rtmp, KILL cr);
 8748   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8749   ins_encode %{
 8750     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8751   %}
 8752   ins_pipe( ialu_reg );
 8753 %}
 8754 
 8755 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8756   predicate(VM_Version::supports_gfni());
 8757   match(Set dst (ReverseI src));
 8758   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8759   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8760   ins_encode %{
 8761     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8762   %}
 8763   ins_pipe( ialu_reg );
 8764 %}
 8765 
 8766 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8767   predicate(!VM_Version::supports_gfni());
 8768   match(Set dst (ReverseL src));
 8769   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8770   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8771   ins_encode %{
 8772     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8773   %}
 8774   ins_pipe( ialu_reg );
 8775 %}
 8776 
 8777 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8778   predicate(VM_Version::supports_gfni());
 8779   match(Set dst (ReverseL src));
 8780   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8781   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8782   ins_encode %{
 8783     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8784   %}
 8785   ins_pipe( ialu_reg );
 8786 %}
 8787 
 8788 //---------- Population Count Instructions -------------------------------------
 8789 
 8790 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8791   predicate(UsePopCountInstruction);
 8792   match(Set dst (PopCountI src));
 8793   effect(KILL cr);
 8794 
 8795   format %{ "popcnt  $dst, $src" %}
 8796   ins_encode %{
 8797     __ popcntl($dst$$Register, $src$$Register);
 8798   %}
 8799   ins_pipe(ialu_reg);
 8800 %}
 8801 
 8802 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8803   predicate(UsePopCountInstruction);
 8804   match(Set dst (PopCountI (LoadI mem)));
 8805   effect(KILL cr);
 8806 
 8807   format %{ "popcnt  $dst, $mem" %}
 8808   ins_encode %{
 8809     __ popcntl($dst$$Register, $mem$$Address);
 8810   %}
 8811   ins_pipe(ialu_reg);
 8812 %}
 8813 
 8814 // Note: Long.bitCount(long) returns an int.
 8815 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8816   predicate(UsePopCountInstruction);
 8817   match(Set dst (PopCountL src));
 8818   effect(KILL cr);
 8819 
 8820   format %{ "popcnt  $dst, $src" %}
 8821   ins_encode %{
 8822     __ popcntq($dst$$Register, $src$$Register);
 8823   %}
 8824   ins_pipe(ialu_reg);
 8825 %}
 8826 
 8827 // Note: Long.bitCount(long) returns an int.
 8828 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8829   predicate(UsePopCountInstruction);
 8830   match(Set dst (PopCountL (LoadL mem)));
 8831   effect(KILL cr);
 8832 
 8833   format %{ "popcnt  $dst, $mem" %}
 8834   ins_encode %{
 8835     __ popcntq($dst$$Register, $mem$$Address);
 8836   %}
 8837   ins_pipe(ialu_reg);
 8838 %}
 8839 
 8840 
 8841 //----------MemBar Instructions-----------------------------------------------
 8842 // Memory barrier flavors
 8843 
 8844 instruct membar_acquire()
 8845 %{
 8846   match(MemBarAcquire);
 8847   match(LoadFence);
 8848   ins_cost(0);
 8849 
 8850   size(0);
 8851   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8852   ins_encode();
 8853   ins_pipe(empty);
 8854 %}
 8855 
 8856 instruct membar_acquire_lock()
 8857 %{
 8858   match(MemBarAcquireLock);
 8859   ins_cost(0);
 8860 
 8861   size(0);
 8862   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8863   ins_encode();
 8864   ins_pipe(empty);
 8865 %}
 8866 
 8867 instruct membar_release()
 8868 %{
 8869   match(MemBarRelease);
 8870   match(StoreFence);
 8871   ins_cost(0);
 8872 
 8873   size(0);
 8874   format %{ "MEMBAR-release ! (empty encoding)" %}
 8875   ins_encode();
 8876   ins_pipe(empty);
 8877 %}
 8878 
 8879 instruct membar_release_lock()
 8880 %{
 8881   match(MemBarReleaseLock);
 8882   ins_cost(0);
 8883 
 8884   size(0);
 8885   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8886   ins_encode();
 8887   ins_pipe(empty);
 8888 %}
 8889 
 8890 instruct membar_volatile(rFlagsReg cr) %{
 8891   match(MemBarVolatile);
 8892   effect(KILL cr);
 8893   ins_cost(400);
 8894 
 8895   format %{
 8896     $$template
 8897     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8898   %}
 8899   ins_encode %{
 8900     __ membar(Assembler::StoreLoad);
 8901   %}
 8902   ins_pipe(pipe_slow);
 8903 %}
 8904 
 8905 instruct unnecessary_membar_volatile()
 8906 %{
 8907   match(MemBarVolatile);
 8908   predicate(Matcher::post_store_load_barrier(n));
 8909   ins_cost(0);
 8910 
 8911   size(0);
 8912   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8913   ins_encode();
 8914   ins_pipe(empty);
 8915 %}
 8916 
 8917 instruct membar_storestore() %{
 8918   match(MemBarStoreStore);
 8919   match(StoreStoreFence);
 8920   ins_cost(0);
 8921 
 8922   size(0);
 8923   format %{ "MEMBAR-storestore (empty encoding)" %}
 8924   ins_encode( );
 8925   ins_pipe(empty);
 8926 %}
 8927 
 8928 //----------Move Instructions--------------------------------------------------
 8929 
 8930 instruct castX2P(rRegP dst, rRegL src)
 8931 %{
 8932   match(Set dst (CastX2P src));
 8933 
 8934   format %{ "movq    $dst, $src\t# long->ptr" %}
 8935   ins_encode %{
 8936     if ($dst$$reg != $src$$reg) {
 8937       __ movptr($dst$$Register, $src$$Register);
 8938     }
 8939   %}
 8940   ins_pipe(ialu_reg_reg); // XXX
 8941 %}
 8942 
 8943 instruct castI2N(rRegN dst, rRegI src)
 8944 %{
 8945   match(Set dst (CastI2N src));
 8946 
 8947   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 8948   ins_encode %{
 8949     if ($dst$$reg != $src$$reg) {
 8950       __ movl($dst$$Register, $src$$Register);
 8951     }
 8952   %}
 8953   ins_pipe(ialu_reg_reg); // XXX
 8954 %}
 8955 
 8956 instruct castN2X(rRegL dst, rRegN src)
 8957 %{
 8958   match(Set dst (CastP2X src));
 8959 
 8960   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8961   ins_encode %{
 8962     if ($dst$$reg != $src$$reg) {
 8963       __ movptr($dst$$Register, $src$$Register);
 8964     }
 8965   %}
 8966   ins_pipe(ialu_reg_reg); // XXX
 8967 %}
 8968 
 8969 instruct castP2X(rRegL dst, rRegP src)
 8970 %{
 8971   match(Set dst (CastP2X src));
 8972 
 8973   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8974   ins_encode %{
 8975     if ($dst$$reg != $src$$reg) {
 8976       __ movptr($dst$$Register, $src$$Register);
 8977     }
 8978   %}
 8979   ins_pipe(ialu_reg_reg); // XXX
 8980 %}
 8981 
 8982 // Convert oop into int for vectors alignment masking
 8983 instruct convP2I(rRegI dst, rRegP src)
 8984 %{
 8985   match(Set dst (ConvL2I (CastP2X src)));
 8986 
 8987   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8988   ins_encode %{
 8989     __ movl($dst$$Register, $src$$Register);
 8990   %}
 8991   ins_pipe(ialu_reg_reg); // XXX
 8992 %}
 8993 
 8994 // Convert compressed oop into int for vectors alignment masking
 8995 // in case of 32bit oops (heap < 4Gb).
 8996 instruct convN2I(rRegI dst, rRegN src)
 8997 %{
 8998   predicate(CompressedOops::shift() == 0);
 8999   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 9000 
 9001   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 9002   ins_encode %{
 9003     __ movl($dst$$Register, $src$$Register);
 9004   %}
 9005   ins_pipe(ialu_reg_reg); // XXX
 9006 %}
 9007 
 9008 // Convert oop pointer into compressed form
 9009 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 9010   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 9011   match(Set dst (EncodeP src));
 9012   effect(KILL cr);
 9013   format %{ "encode_heap_oop $dst,$src" %}
 9014   ins_encode %{
 9015     Register s = $src$$Register;
 9016     Register d = $dst$$Register;
 9017     if (s != d) {
 9018       __ movq(d, s);
 9019     }
 9020     __ encode_heap_oop(d);
 9021   %}
 9022   ins_pipe(ialu_reg_long);
 9023 %}
 9024 
 9025 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9026   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 9027   match(Set dst (EncodeP src));
 9028   effect(KILL cr);
 9029   format %{ "encode_heap_oop_not_null $dst,$src" %}
 9030   ins_encode %{
 9031     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 9032   %}
 9033   ins_pipe(ialu_reg_long);
 9034 %}
 9035 
 9036 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 9037   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 9038             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 9039   match(Set dst (DecodeN src));
 9040   effect(KILL cr);
 9041   format %{ "decode_heap_oop $dst,$src" %}
 9042   ins_encode %{
 9043     Register s = $src$$Register;
 9044     Register d = $dst$$Register;
 9045     if (s != d) {
 9046       __ movq(d, s);
 9047     }
 9048     __ decode_heap_oop(d);
 9049   %}
 9050   ins_pipe(ialu_reg_long);
 9051 %}
 9052 
 9053 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9054   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 9055             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 9056   match(Set dst (DecodeN src));
 9057   effect(KILL cr);
 9058   format %{ "decode_heap_oop_not_null $dst,$src" %}
 9059   ins_encode %{
 9060     Register s = $src$$Register;
 9061     Register d = $dst$$Register;
 9062     if (s != d) {
 9063       __ decode_heap_oop_not_null(d, s);
 9064     } else {
 9065       __ decode_heap_oop_not_null(d);
 9066     }
 9067   %}
 9068   ins_pipe(ialu_reg_long);
 9069 %}
 9070 
 9071 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9072   match(Set dst (EncodePKlass src));
 9073   effect(TEMP dst, KILL cr);
 9074   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 9075   ins_encode %{
 9076     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9077   %}
 9078   ins_pipe(ialu_reg_long);
 9079 %}
 9080 
 9081 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9082   match(Set dst (DecodeNKlass src));
 9083   effect(TEMP dst, KILL cr);
 9084   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 9085   ins_encode %{
 9086     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9087   %}
 9088   ins_pipe(ialu_reg_long);
 9089 %}
 9090 
 9091 //----------Conditional Move---------------------------------------------------
 9092 // Jump
 9093 // dummy instruction for generating temp registers
 9094 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 9095   match(Jump (LShiftL switch_val shift));
 9096   ins_cost(350);
 9097   predicate(false);
 9098   effect(TEMP dest);
 9099 
 9100   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9101             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 9102   ins_encode %{
 9103     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9104     // to do that and the compiler is using that register as one it can allocate.
 9105     // So we build it all by hand.
 9106     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9107     // ArrayAddress dispatch(table, index);
 9108     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9109     __ lea($dest$$Register, $constantaddress);
 9110     __ jmp(dispatch);
 9111   %}
 9112   ins_pipe(pipe_jmp);
 9113 %}
 9114 
 9115 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9116   match(Jump (AddL (LShiftL switch_val shift) offset));
 9117   ins_cost(350);
 9118   effect(TEMP dest);
 9119 
 9120   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9121             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9122   ins_encode %{
 9123     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9124     // to do that and the compiler is using that register as one it can allocate.
 9125     // So we build it all by hand.
 9126     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9127     // ArrayAddress dispatch(table, index);
 9128     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9129     __ lea($dest$$Register, $constantaddress);
 9130     __ jmp(dispatch);
 9131   %}
 9132   ins_pipe(pipe_jmp);
 9133 %}
 9134 
 9135 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9136   match(Jump switch_val);
 9137   ins_cost(350);
 9138   effect(TEMP dest);
 9139 
 9140   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9141             "jmp     [$dest + $switch_val]\n\t" %}
 9142   ins_encode %{
 9143     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9144     // to do that and the compiler is using that register as one it can allocate.
 9145     // So we build it all by hand.
 9146     // Address index(noreg, switch_reg, Address::times_1);
 9147     // ArrayAddress dispatch(table, index);
 9148     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9149     __ lea($dest$$Register, $constantaddress);
 9150     __ jmp(dispatch);
 9151   %}
 9152   ins_pipe(pipe_jmp);
 9153 %}
 9154 
 9155 // Conditional move
 9156 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9157 %{
 9158   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9159   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9160 
 9161   ins_cost(100); // XXX
 9162   format %{ "setbn$cop $dst\t# signed, int" %}
 9163   ins_encode %{
 9164     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9165     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9166   %}
 9167   ins_pipe(ialu_reg);
 9168 %}
 9169 
 9170 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9171 %{
 9172   predicate(!UseAPX);
 9173   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9174 
 9175   ins_cost(200); // XXX
 9176   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9177   ins_encode %{
 9178     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9179   %}
 9180   ins_pipe(pipe_cmov_reg);
 9181 %}
 9182 
 9183 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9184 %{
 9185   predicate(UseAPX);
 9186   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9187 
 9188   ins_cost(200);
 9189   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9190   ins_encode %{
 9191     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9192   %}
 9193   ins_pipe(pipe_cmov_reg);
 9194 %}
 9195 
 9196 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9197 %{
 9198   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9199   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9200 
 9201   ins_cost(100); // XXX
 9202   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9203   ins_encode %{
 9204     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9205     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9206   %}
 9207   ins_pipe(ialu_reg);
 9208 %}
 9209 
 9210 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9211   predicate(!UseAPX);
 9212   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9213 
 9214   ins_cost(200); // XXX
 9215   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9216   ins_encode %{
 9217     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9218   %}
 9219   ins_pipe(pipe_cmov_reg);
 9220 %}
 9221 
 9222 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9223   predicate(UseAPX);
 9224   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9225 
 9226   ins_cost(200);
 9227   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9228   ins_encode %{
 9229     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9230   %}
 9231   ins_pipe(pipe_cmov_reg);
 9232 %}
 9233 
 9234 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9235 %{
 9236   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9237   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9238 
 9239   ins_cost(100); // XXX
 9240   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9241   ins_encode %{
 9242     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9243     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9244   %}
 9245   ins_pipe(ialu_reg);
 9246 %}
 9247 
 9248 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9249 %{
 9250   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9251   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9252 
 9253   ins_cost(100); // XXX
 9254   format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
 9255   ins_encode %{
 9256     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9257     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9258   %}
 9259   ins_pipe(ialu_reg);
 9260 %}
 9261 
 9262 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9263   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9264 
 9265   ins_cost(200);
 9266   expand %{
 9267     cmovI_regU(cop, cr, dst, src);
 9268   %}
 9269 %}
 9270 
 9271 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
 9272   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9273 
 9274   ins_cost(200);
 9275   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9276   ins_encode %{
 9277     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9278   %}
 9279   ins_pipe(pipe_cmov_reg);
 9280 %}
 9281 
 9282 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9283   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9284   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9285 
 9286   ins_cost(200); // XXX
 9287   format %{ "cmovpl  $dst, $src\n\t"
 9288             "cmovnel $dst, $src" %}
 9289   ins_encode %{
 9290     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9291     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9292   %}
 9293   ins_pipe(pipe_cmov_reg);
 9294 %}
 9295 
 9296 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9297 // inputs of the CMove
 9298 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9299   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9300   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9301   effect(TEMP dst);
 9302 
 9303   ins_cost(200); // XXX
 9304   format %{ "cmovpl  $dst, $src\n\t"
 9305             "cmovnel $dst, $src" %}
 9306   ins_encode %{
 9307     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9308     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9309   %}
 9310   ins_pipe(pipe_cmov_reg);
 9311 %}
 9312 
 9313 // Conditional move
 9314 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9315   predicate(!UseAPX);
 9316   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9317 
 9318   ins_cost(250); // XXX
 9319   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9320   ins_encode %{
 9321     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9322   %}
 9323   ins_pipe(pipe_cmov_mem);
 9324 %}
 9325 
 9326 // Conditional move
 9327 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9328 %{
 9329   predicate(UseAPX);
 9330   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9331 
 9332   ins_cost(250);
 9333   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9334   ins_encode %{
 9335     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9336   %}
 9337   ins_pipe(pipe_cmov_mem);
 9338 %}
 9339 
 9340 // Conditional move
 9341 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9342 %{
 9343   predicate(!UseAPX);
 9344   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9345 
 9346   ins_cost(250); // XXX
 9347   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9348   ins_encode %{
 9349     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9350   %}
 9351   ins_pipe(pipe_cmov_mem);
 9352 %}
 9353 
 9354 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9355   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9356 
 9357   ins_cost(250);
 9358   expand %{
 9359     cmovI_memU(cop, cr, dst, src);
 9360   %}
 9361 %}
 9362 
 9363 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9364 %{
 9365   predicate(UseAPX);
 9366   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9367 
 9368   ins_cost(250);
 9369   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9370   ins_encode %{
 9371     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9372   %}
 9373   ins_pipe(pipe_cmov_mem);
 9374 %}
 9375 
 9376 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
 9377 %{
 9378   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9379 
 9380   ins_cost(250);
 9381   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9382   ins_encode %{
 9383     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9384   %}
 9385   ins_pipe(pipe_cmov_mem);
 9386 %}
 9387 
 9388 // Conditional move
 9389 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9390 %{
 9391   predicate(!UseAPX);
 9392   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9393 
 9394   ins_cost(200); // XXX
 9395   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9396   ins_encode %{
 9397     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9398   %}
 9399   ins_pipe(pipe_cmov_reg);
 9400 %}
 9401 
 9402 // Conditional move ndd
 9403 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9404 %{
 9405   predicate(UseAPX);
 9406   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9407 
 9408   ins_cost(200);
 9409   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9410   ins_encode %{
 9411     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9412   %}
 9413   ins_pipe(pipe_cmov_reg);
 9414 %}
 9415 
 9416 // Conditional move
 9417 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9418 %{
 9419   predicate(!UseAPX);
 9420   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9421 
 9422   ins_cost(200); // XXX
 9423   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9424   ins_encode %{
 9425     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9426   %}
 9427   ins_pipe(pipe_cmov_reg);
 9428 %}
 9429 
 9430 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9431   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9432 
 9433   ins_cost(200);
 9434   expand %{
 9435     cmovN_regU(cop, cr, dst, src);
 9436   %}
 9437 %}
 9438 
 9439 // Conditional move ndd
 9440 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9441 %{
 9442   predicate(UseAPX);
 9443   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9444 
 9445   ins_cost(200);
 9446   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9447   ins_encode %{
 9448     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9449   %}
 9450   ins_pipe(pipe_cmov_reg);
 9451 %}
 9452 
 9453 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
 9454   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9455 
 9456   ins_cost(200);
 9457   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
 9458   ins_encode %{
 9459     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9460   %}
 9461   ins_pipe(pipe_cmov_reg);
 9462 %}
 9463 
 9464 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9465   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9466   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9467 
 9468   ins_cost(200); // XXX
 9469   format %{ "cmovpl  $dst, $src\n\t"
 9470             "cmovnel $dst, $src" %}
 9471   ins_encode %{
 9472     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9473     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9474   %}
 9475   ins_pipe(pipe_cmov_reg);
 9476 %}
 9477 
 9478 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9479 // inputs of the CMove
 9480 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9481   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9482   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9483 
 9484   ins_cost(200); // XXX
 9485   format %{ "cmovpl  $dst, $src\n\t"
 9486             "cmovnel $dst, $src" %}
 9487   ins_encode %{
 9488     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9489     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9490   %}
 9491   ins_pipe(pipe_cmov_reg);
 9492 %}
 9493 
 9494 // Conditional move
 9495 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9496 %{
 9497   predicate(!UseAPX);
 9498   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9499 
 9500   ins_cost(200); // XXX
 9501   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9502   ins_encode %{
 9503     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9504   %}
 9505   ins_pipe(pipe_cmov_reg);  // XXX
 9506 %}
 9507 
 9508 // Conditional move ndd
 9509 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9510 %{
 9511   predicate(UseAPX);
 9512   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9513 
 9514   ins_cost(200);
 9515   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9516   ins_encode %{
 9517     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9518   %}
 9519   ins_pipe(pipe_cmov_reg);
 9520 %}
 9521 
 9522 // Conditional move
 9523 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9524 %{
 9525   predicate(!UseAPX);
 9526   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9527 
 9528   ins_cost(200); // XXX
 9529   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9530   ins_encode %{
 9531     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9532   %}
 9533   ins_pipe(pipe_cmov_reg); // XXX
 9534 %}
 9535 
 9536 // Conditional move ndd
 9537 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9538 %{
 9539   predicate(UseAPX);
 9540   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9541 
 9542   ins_cost(200);
 9543   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9544   ins_encode %{
 9545     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9546   %}
 9547   ins_pipe(pipe_cmov_reg);
 9548 %}
 9549 
 9550 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9551   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9552 
 9553   ins_cost(200);
 9554   expand %{
 9555     cmovP_regU(cop, cr, dst, src);
 9556   %}
 9557 %}
 9558 
 9559 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
 9560   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9561 
 9562   ins_cost(200);
 9563   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
 9564   ins_encode %{
 9565     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9566   %}
 9567   ins_pipe(pipe_cmov_reg);
 9568 %}
 9569 
 9570 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9571   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9572   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9573 
 9574   ins_cost(200); // XXX
 9575   format %{ "cmovpq  $dst, $src\n\t"
 9576             "cmovneq $dst, $src" %}
 9577   ins_encode %{
 9578     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9579     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9580   %}
 9581   ins_pipe(pipe_cmov_reg);
 9582 %}
 9583 
 9584 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9585 // inputs of the CMove
 9586 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9587   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9588   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9589 
 9590   ins_cost(200); // XXX
 9591   format %{ "cmovpq  $dst, $src\n\t"
 9592             "cmovneq $dst, $src" %}
 9593   ins_encode %{
 9594     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9595     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9596   %}
 9597   ins_pipe(pipe_cmov_reg);
 9598 %}
 9599 
 9600 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9601 %{
 9602   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9603   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9604 
 9605   ins_cost(100); // XXX
 9606   format %{ "setbn$cop $dst\t# signed, long" %}
 9607   ins_encode %{
 9608     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9609     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9610   %}
 9611   ins_pipe(ialu_reg);
 9612 %}
 9613 
 9614 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9615 %{
 9616   predicate(!UseAPX);
 9617   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9618 
 9619   ins_cost(200); // XXX
 9620   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9621   ins_encode %{
 9622     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9623   %}
 9624   ins_pipe(pipe_cmov_reg);  // XXX
 9625 %}
 9626 
 9627 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9628 %{
 9629   predicate(UseAPX);
 9630   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9631 
 9632   ins_cost(200);
 9633   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9634   ins_encode %{
 9635     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9636   %}
 9637   ins_pipe(pipe_cmov_reg);
 9638 %}
 9639 
 9640 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9641 %{
 9642   predicate(!UseAPX);
 9643   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9644 
 9645   ins_cost(200); // XXX
 9646   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9647   ins_encode %{
 9648     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9649   %}
 9650   ins_pipe(pipe_cmov_mem);  // XXX
 9651 %}
 9652 
 9653 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9654 %{
 9655   predicate(UseAPX);
 9656   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9657 
 9658   ins_cost(200);
 9659   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9660   ins_encode %{
 9661     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9662   %}
 9663   ins_pipe(pipe_cmov_mem);
 9664 %}
 9665 
 9666 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9667 %{
 9668   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9669   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9670 
 9671   ins_cost(100); // XXX
 9672   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9673   ins_encode %{
 9674     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9675     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9676   %}
 9677   ins_pipe(ialu_reg);
 9678 %}
 9679 
 9680 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9681 %{
 9682   predicate(!UseAPX);
 9683   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9684 
 9685   ins_cost(200); // XXX
 9686   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9687   ins_encode %{
 9688     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9689   %}
 9690   ins_pipe(pipe_cmov_reg); // XXX
 9691 %}
 9692 
 9693 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9694 %{
 9695   predicate(UseAPX);
 9696   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9697 
 9698   ins_cost(200);
 9699   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9700   ins_encode %{
 9701     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9702   %}
 9703   ins_pipe(pipe_cmov_reg);
 9704 %}
 9705 
 9706 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9707 %{
 9708   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9709   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9710 
 9711   ins_cost(100); // XXX
 9712   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9713   ins_encode %{
 9714     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9715     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9716   %}
 9717   ins_pipe(ialu_reg);
 9718 %}
 9719 
 9720 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9721 %{
 9722   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9723   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9724 
 9725   ins_cost(100); // XXX
 9726   format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
 9727   ins_encode %{
 9728     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9729     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9730   %}
 9731   ins_pipe(ialu_reg);
 9732 %}
 9733 
 9734 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9735   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9736 
 9737   ins_cost(200);
 9738   expand %{
 9739     cmovL_regU(cop, cr, dst, src);
 9740   %}
 9741 %}
 9742 
 9743 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
 9744 %{
 9745   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9746 
 9747   ins_cost(200);
 9748   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9749   ins_encode %{
 9750     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9751   %}
 9752   ins_pipe(pipe_cmov_reg);
 9753 %}
 9754 
 9755 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9756   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9757   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9758 
 9759   ins_cost(200); // XXX
 9760   format %{ "cmovpq  $dst, $src\n\t"
 9761             "cmovneq $dst, $src" %}
 9762   ins_encode %{
 9763     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9764     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9765   %}
 9766   ins_pipe(pipe_cmov_reg);
 9767 %}
 9768 
 9769 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9770 // inputs of the CMove
 9771 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9772   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9773   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9774 
 9775   ins_cost(200); // XXX
 9776   format %{ "cmovpq  $dst, $src\n\t"
 9777             "cmovneq $dst, $src" %}
 9778   ins_encode %{
 9779     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9780     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9781   %}
 9782   ins_pipe(pipe_cmov_reg);
 9783 %}
 9784 
 9785 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9786 %{
 9787   predicate(!UseAPX);
 9788   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9789 
 9790   ins_cost(200); // XXX
 9791   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9792   ins_encode %{
 9793     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9794   %}
 9795   ins_pipe(pipe_cmov_mem); // XXX
 9796 %}
 9797 
 9798 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9799   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9800 
 9801   ins_cost(200);
 9802   expand %{
 9803     cmovL_memU(cop, cr, dst, src);
 9804   %}
 9805 %}
 9806 
 9807 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9808 %{
 9809   predicate(UseAPX);
 9810   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9811 
 9812   ins_cost(200);
 9813   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9814   ins_encode %{
 9815     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9816   %}
 9817   ins_pipe(pipe_cmov_mem);
 9818 %}
 9819 
 9820 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
 9821 %{
 9822   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9823 
 9824   ins_cost(200);
 9825   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9826   ins_encode %{
 9827     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9828   %}
 9829   ins_pipe(pipe_cmov_mem);
 9830 %}
 9831 
 9832 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9833 %{
 9834   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9835 
 9836   ins_cost(200); // XXX
 9837   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9838             "movss     $dst, $src\n"
 9839     "skip:" %}
 9840   ins_encode %{
 9841     Label Lskip;
 9842     // Invert sense of branch from sense of CMOV
 9843     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9844     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9845     __ bind(Lskip);
 9846   %}
 9847   ins_pipe(pipe_slow);
 9848 %}
 9849 
 9850 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9851 %{
 9852   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9853 
 9854   ins_cost(200); // XXX
 9855   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9856             "movss     $dst, $src\n"
 9857     "skip:" %}
 9858   ins_encode %{
 9859     Label Lskip;
 9860     // Invert sense of branch from sense of CMOV
 9861     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9862     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9863     __ bind(Lskip);
 9864   %}
 9865   ins_pipe(pipe_slow);
 9866 %}
 9867 
 9868 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9869   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9870 
 9871   ins_cost(200);
 9872   expand %{
 9873     cmovF_regU(cop, cr, dst, src);
 9874   %}
 9875 %}
 9876 
 9877 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
 9878 %{
 9879   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9880 
 9881   ins_cost(200); // XXX
 9882   format %{ "jn$cop    skip\t# signed, unsigned cmove float\n\t"
 9883             "movss     $dst, $src\n"
 9884     "skip:" %}
 9885   ins_encode %{
 9886     Label Lskip;
 9887     // Invert sense of branch from sense of CMOV
 9888     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9889     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9890     __ bind(Lskip);
 9891   %}
 9892   ins_pipe(pipe_slow);
 9893 %}
 9894 
 9895 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9896 %{
 9897   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9898 
 9899   ins_cost(200); // XXX
 9900   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9901             "movsd     $dst, $src\n"
 9902     "skip:" %}
 9903   ins_encode %{
 9904     Label Lskip;
 9905     // Invert sense of branch from sense of CMOV
 9906     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9907     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9908     __ bind(Lskip);
 9909   %}
 9910   ins_pipe(pipe_slow);
 9911 %}
 9912 
 9913 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9914 %{
 9915   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9916 
 9917   ins_cost(200); // XXX
 9918   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9919             "movsd     $dst, $src\n"
 9920     "skip:" %}
 9921   ins_encode %{
 9922     Label Lskip;
 9923     // Invert sense of branch from sense of CMOV
 9924     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9925     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9926     __ bind(Lskip);
 9927   %}
 9928   ins_pipe(pipe_slow);
 9929 %}
 9930 
 9931 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9932   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9933 
 9934   ins_cost(200);
 9935   expand %{
 9936     cmovD_regU(cop, cr, dst, src);
 9937   %}
 9938 %}
 9939 
 9940 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
 9941 %{
 9942   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9943 
 9944   ins_cost(200); // XXX
 9945   format %{ "jn$cop    skip\t# signed, unsigned cmove double\n\t"
 9946             "movsd     $dst, $src\n"
 9947     "skip:" %}
 9948   ins_encode %{
 9949     Label Lskip;
 9950     // Invert sense of branch from sense of CMOV
 9951     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9952     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9953     __ bind(Lskip);
 9954   %}
 9955   ins_pipe(pipe_slow);
 9956 %}
 9957 
 9958 //----------Arithmetic Instructions--------------------------------------------
 9959 //----------Addition Instructions----------------------------------------------
 9960 
 9961 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9962 %{
 9963   predicate(!UseAPX);
 9964   match(Set dst (AddI dst src));
 9965   effect(KILL cr);
 9966   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9967   format %{ "addl    $dst, $src\t# int" %}
 9968   ins_encode %{
 9969     __ addl($dst$$Register, $src$$Register);
 9970   %}
 9971   ins_pipe(ialu_reg_reg);
 9972 %}
 9973 
 9974 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9975 %{
 9976   predicate(UseAPX);
 9977   match(Set dst (AddI src1 src2));
 9978   effect(KILL cr);
 9979   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
 9980 
 9981   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9982   ins_encode %{
 9983     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9984   %}
 9985   ins_pipe(ialu_reg_reg);
 9986 %}
 9987 
 9988 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9989 %{
 9990   predicate(!UseAPX);
 9991   match(Set dst (AddI dst src));
 9992   effect(KILL cr);
 9993   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9994 
 9995   format %{ "addl    $dst, $src\t# int" %}
 9996   ins_encode %{
 9997     __ addl($dst$$Register, $src$$constant);
 9998   %}
 9999   ins_pipe( ialu_reg );
10000 %}
10001 
10002 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
10003 %{
10004   predicate(UseAPX);
10005   match(Set dst (AddI src1 src2));
10006   effect(KILL cr);
10007   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10008 
10009   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10010   ins_encode %{
10011     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
10012   %}
10013   ins_pipe( ialu_reg );
10014 %}
10015 
10016 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
10017 %{
10018   predicate(UseAPX);
10019   match(Set dst (AddI (LoadI src1) src2));
10020   effect(KILL cr);
10021   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10022 
10023   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10024   ins_encode %{
10025     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
10026   %}
10027   ins_pipe( ialu_reg );
10028 %}
10029 
10030 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10031 %{
10032   predicate(!UseAPX);
10033   match(Set dst (AddI dst (LoadI src)));
10034   effect(KILL cr);
10035   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10036 
10037   ins_cost(150); // XXX
10038   format %{ "addl    $dst, $src\t# int" %}
10039   ins_encode %{
10040     __ addl($dst$$Register, $src$$Address);
10041   %}
10042   ins_pipe(ialu_reg_mem);
10043 %}
10044 
10045 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10046 %{
10047   predicate(UseAPX);
10048   match(Set dst (AddI src1 (LoadI src2)));
10049   effect(KILL cr);
10050   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10051 
10052   ins_cost(150);
10053   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10054   ins_encode %{
10055     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10056   %}
10057   ins_pipe(ialu_reg_mem);
10058 %}
10059 
10060 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10061 %{
10062   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10063   effect(KILL cr);
10064   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10065 
10066   ins_cost(150); // XXX
10067   format %{ "addl    $dst, $src\t# int" %}
10068   ins_encode %{
10069     __ addl($dst$$Address, $src$$Register);
10070   %}
10071   ins_pipe(ialu_mem_reg);
10072 %}
10073 
10074 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10075 %{
10076   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10077   effect(KILL cr);
10078   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10079 
10080 
10081   ins_cost(125); // XXX
10082   format %{ "addl    $dst, $src\t# int" %}
10083   ins_encode %{
10084     __ addl($dst$$Address, $src$$constant);
10085   %}
10086   ins_pipe(ialu_mem_imm);
10087 %}
10088 
10089 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10090 %{
10091   predicate(!UseAPX && UseIncDec);
10092   match(Set dst (AddI dst src));
10093   effect(KILL cr);
10094 
10095   format %{ "incl    $dst\t# int" %}
10096   ins_encode %{
10097     __ incrementl($dst$$Register);
10098   %}
10099   ins_pipe(ialu_reg);
10100 %}
10101 
10102 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10103 %{
10104   predicate(UseAPX && UseIncDec);
10105   match(Set dst (AddI src val));
10106   effect(KILL cr);
10107   flag(PD::Flag_ndd_demotable_opr1);
10108 
10109   format %{ "eincl    $dst, $src\t# int ndd" %}
10110   ins_encode %{
10111     __ eincl($dst$$Register, $src$$Register, false);
10112   %}
10113   ins_pipe(ialu_reg);
10114 %}
10115 
10116 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10117 %{
10118   predicate(UseAPX && UseIncDec);
10119   match(Set dst (AddI (LoadI src) val));
10120   effect(KILL cr);
10121 
10122   format %{ "eincl    $dst, $src\t# int ndd" %}
10123   ins_encode %{
10124     __ eincl($dst$$Register, $src$$Address, false);
10125   %}
10126   ins_pipe(ialu_reg);
10127 %}
10128 
10129 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10130 %{
10131   predicate(UseIncDec);
10132   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10133   effect(KILL cr);
10134 
10135   ins_cost(125); // XXX
10136   format %{ "incl    $dst\t# int" %}
10137   ins_encode %{
10138     __ incrementl($dst$$Address);
10139   %}
10140   ins_pipe(ialu_mem_imm);
10141 %}
10142 
10143 // XXX why does that use AddI
10144 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10145 %{
10146   predicate(!UseAPX && UseIncDec);
10147   match(Set dst (AddI dst src));
10148   effect(KILL cr);
10149 
10150   format %{ "decl    $dst\t# int" %}
10151   ins_encode %{
10152     __ decrementl($dst$$Register);
10153   %}
10154   ins_pipe(ialu_reg);
10155 %}
10156 
10157 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10158 %{
10159   predicate(UseAPX && UseIncDec);
10160   match(Set dst (AddI src val));
10161   effect(KILL cr);
10162   flag(PD::Flag_ndd_demotable_opr1);
10163 
10164   format %{ "edecl    $dst, $src\t# int ndd" %}
10165   ins_encode %{
10166     __ edecl($dst$$Register, $src$$Register, false);
10167   %}
10168   ins_pipe(ialu_reg);
10169 %}
10170 
10171 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10172 %{
10173   predicate(UseAPX && UseIncDec);
10174   match(Set dst (AddI (LoadI src) val));
10175   effect(KILL cr);
10176 
10177   format %{ "edecl    $dst, $src\t# int ndd" %}
10178   ins_encode %{
10179     __ edecl($dst$$Register, $src$$Address, false);
10180   %}
10181   ins_pipe(ialu_reg);
10182 %}
10183 
10184 // XXX why does that use AddI
10185 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10186 %{
10187   predicate(UseIncDec);
10188   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10189   effect(KILL cr);
10190 
10191   ins_cost(125); // XXX
10192   format %{ "decl    $dst\t# int" %}
10193   ins_encode %{
10194     __ decrementl($dst$$Address);
10195   %}
10196   ins_pipe(ialu_mem_imm);
10197 %}
10198 
10199 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10200 %{
10201   predicate(VM_Version::supports_fast_2op_lea());
10202   match(Set dst (AddI (LShiftI index scale) disp));
10203 
10204   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10205   ins_encode %{
10206     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10207     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10208   %}
10209   ins_pipe(ialu_reg_reg);
10210 %}
10211 
10212 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10213 %{
10214   predicate(VM_Version::supports_fast_3op_lea());
10215   match(Set dst (AddI (AddI base index) disp));
10216 
10217   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10218   ins_encode %{
10219     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10220   %}
10221   ins_pipe(ialu_reg_reg);
10222 %}
10223 
10224 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10225 %{
10226   predicate(VM_Version::supports_fast_2op_lea());
10227   match(Set dst (AddI base (LShiftI index scale)));
10228 
10229   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10230   ins_encode %{
10231     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10232     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10233   %}
10234   ins_pipe(ialu_reg_reg);
10235 %}
10236 
10237 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10238 %{
10239   predicate(VM_Version::supports_fast_3op_lea());
10240   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10241 
10242   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10243   ins_encode %{
10244     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10245     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10246   %}
10247   ins_pipe(ialu_reg_reg);
10248 %}
10249 
10250 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10251 %{
10252   predicate(!UseAPX);
10253   match(Set dst (AddL dst src));
10254   effect(KILL cr);
10255   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10256 
10257   format %{ "addq    $dst, $src\t# long" %}
10258   ins_encode %{
10259     __ addq($dst$$Register, $src$$Register);
10260   %}
10261   ins_pipe(ialu_reg_reg);
10262 %}
10263 
10264 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10265 %{
10266   predicate(UseAPX);
10267   match(Set dst (AddL src1 src2));
10268   effect(KILL cr);
10269   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10270 
10271   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10272   ins_encode %{
10273     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10274   %}
10275   ins_pipe(ialu_reg_reg);
10276 %}
10277 
10278 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10279 %{
10280   predicate(!UseAPX);
10281   match(Set dst (AddL dst src));
10282   effect(KILL cr);
10283   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10284 
10285   format %{ "addq    $dst, $src\t# long" %}
10286   ins_encode %{
10287     __ addq($dst$$Register, $src$$constant);
10288   %}
10289   ins_pipe( ialu_reg );
10290 %}
10291 
10292 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10293 %{
10294   predicate(UseAPX);
10295   match(Set dst (AddL src1 src2));
10296   effect(KILL cr);
10297   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10298 
10299   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10300   ins_encode %{
10301     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10302   %}
10303   ins_pipe( ialu_reg );
10304 %}
10305 
10306 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10307 %{
10308   predicate(UseAPX);
10309   match(Set dst (AddL (LoadL src1) src2));
10310   effect(KILL cr);
10311   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10312 
10313   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10314   ins_encode %{
10315     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10316   %}
10317   ins_pipe( ialu_reg );
10318 %}
10319 
10320 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10321 %{
10322   predicate(!UseAPX);
10323   match(Set dst (AddL dst (LoadL src)));
10324   effect(KILL cr);
10325   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10326 
10327   ins_cost(150); // XXX
10328   format %{ "addq    $dst, $src\t# long" %}
10329   ins_encode %{
10330     __ addq($dst$$Register, $src$$Address);
10331   %}
10332   ins_pipe(ialu_reg_mem);
10333 %}
10334 
10335 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10336 %{
10337   predicate(UseAPX);
10338   match(Set dst (AddL src1 (LoadL src2)));
10339   effect(KILL cr);
10340   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10341 
10342   ins_cost(150);
10343   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10344   ins_encode %{
10345     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10346   %}
10347   ins_pipe(ialu_reg_mem);
10348 %}
10349 
10350 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10351 %{
10352   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10353   effect(KILL cr);
10354   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10355 
10356   ins_cost(150); // XXX
10357   format %{ "addq    $dst, $src\t# long" %}
10358   ins_encode %{
10359     __ addq($dst$$Address, $src$$Register);
10360   %}
10361   ins_pipe(ialu_mem_reg);
10362 %}
10363 
10364 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10365 %{
10366   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10367   effect(KILL cr);
10368   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10369 
10370   ins_cost(125); // XXX
10371   format %{ "addq    $dst, $src\t# long" %}
10372   ins_encode %{
10373     __ addq($dst$$Address, $src$$constant);
10374   %}
10375   ins_pipe(ialu_mem_imm);
10376 %}
10377 
10378 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10379 %{
10380   predicate(!UseAPX && UseIncDec);
10381   match(Set dst (AddL dst src));
10382   effect(KILL cr);
10383 
10384   format %{ "incq    $dst\t# long" %}
10385   ins_encode %{
10386     __ incrementq($dst$$Register);
10387   %}
10388   ins_pipe(ialu_reg);
10389 %}
10390 
10391 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10392 %{
10393   predicate(UseAPX && UseIncDec);
10394   match(Set dst (AddL src val));
10395   effect(KILL cr);
10396   flag(PD::Flag_ndd_demotable_opr1);
10397 
10398   format %{ "eincq    $dst, $src\t# long ndd" %}
10399   ins_encode %{
10400     __ eincq($dst$$Register, $src$$Register, false);
10401   %}
10402   ins_pipe(ialu_reg);
10403 %}
10404 
10405 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10406 %{
10407   predicate(UseAPX && UseIncDec);
10408   match(Set dst (AddL (LoadL src) val));
10409   effect(KILL cr);
10410 
10411   format %{ "eincq    $dst, $src\t# long ndd" %}
10412   ins_encode %{
10413     __ eincq($dst$$Register, $src$$Address, false);
10414   %}
10415   ins_pipe(ialu_reg);
10416 %}
10417 
10418 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10419 %{
10420   predicate(UseIncDec);
10421   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10422   effect(KILL cr);
10423 
10424   ins_cost(125); // XXX
10425   format %{ "incq    $dst\t# long" %}
10426   ins_encode %{
10427     __ incrementq($dst$$Address);
10428   %}
10429   ins_pipe(ialu_mem_imm);
10430 %}
10431 
10432 // XXX why does that use AddL
10433 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10434 %{
10435   predicate(!UseAPX && UseIncDec);
10436   match(Set dst (AddL dst src));
10437   effect(KILL cr);
10438 
10439   format %{ "decq    $dst\t# long" %}
10440   ins_encode %{
10441     __ decrementq($dst$$Register);
10442   %}
10443   ins_pipe(ialu_reg);
10444 %}
10445 
10446 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10447 %{
10448   predicate(UseAPX && UseIncDec);
10449   match(Set dst (AddL src val));
10450   effect(KILL cr);
10451   flag(PD::Flag_ndd_demotable_opr1);
10452 
10453   format %{ "edecq    $dst, $src\t# long ndd" %}
10454   ins_encode %{
10455     __ edecq($dst$$Register, $src$$Register, false);
10456   %}
10457   ins_pipe(ialu_reg);
10458 %}
10459 
10460 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10461 %{
10462   predicate(UseAPX && UseIncDec);
10463   match(Set dst (AddL (LoadL src) val));
10464   effect(KILL cr);
10465 
10466   format %{ "edecq    $dst, $src\t# long ndd" %}
10467   ins_encode %{
10468     __ edecq($dst$$Register, $src$$Address, false);
10469   %}
10470   ins_pipe(ialu_reg);
10471 %}
10472 
10473 // XXX why does that use AddL
10474 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10475 %{
10476   predicate(UseIncDec);
10477   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10478   effect(KILL cr);
10479 
10480   ins_cost(125); // XXX
10481   format %{ "decq    $dst\t# long" %}
10482   ins_encode %{
10483     __ decrementq($dst$$Address);
10484   %}
10485   ins_pipe(ialu_mem_imm);
10486 %}
10487 
10488 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10489 %{
10490   predicate(VM_Version::supports_fast_2op_lea());
10491   match(Set dst (AddL (LShiftL index scale) disp));
10492 
10493   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10494   ins_encode %{
10495     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10496     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10497   %}
10498   ins_pipe(ialu_reg_reg);
10499 %}
10500 
10501 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10502 %{
10503   predicate(VM_Version::supports_fast_3op_lea());
10504   match(Set dst (AddL (AddL base index) disp));
10505 
10506   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10507   ins_encode %{
10508     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10509   %}
10510   ins_pipe(ialu_reg_reg);
10511 %}
10512 
10513 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10514 %{
10515   predicate(VM_Version::supports_fast_2op_lea());
10516   match(Set dst (AddL base (LShiftL index scale)));
10517 
10518   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10519   ins_encode %{
10520     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10521     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10522   %}
10523   ins_pipe(ialu_reg_reg);
10524 %}
10525 
10526 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10527 %{
10528   predicate(VM_Version::supports_fast_3op_lea());
10529   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10530 
10531   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10532   ins_encode %{
10533     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10534     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10535   %}
10536   ins_pipe(ialu_reg_reg);
10537 %}
10538 
10539 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10540 %{
10541   match(Set dst (AddP dst src));
10542   effect(KILL cr);
10543   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10544 
10545   format %{ "addq    $dst, $src\t# ptr" %}
10546   ins_encode %{
10547     __ addq($dst$$Register, $src$$Register);
10548   %}
10549   ins_pipe(ialu_reg_reg);
10550 %}
10551 
10552 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10553 %{
10554   match(Set dst (AddP dst src));
10555   effect(KILL cr);
10556   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10557 
10558   format %{ "addq    $dst, $src\t# ptr" %}
10559   ins_encode %{
10560     __ addq($dst$$Register, $src$$constant);
10561   %}
10562   ins_pipe( ialu_reg );
10563 %}
10564 
10565 // XXX addP mem ops ????
10566 
10567 instruct checkCastPP(rRegP dst)
10568 %{
10569   match(Set dst (CheckCastPP dst));
10570 
10571   size(0);
10572   format %{ "# checkcastPP of $dst" %}
10573   ins_encode(/* empty encoding */);
10574   ins_pipe(empty);
10575 %}
10576 
10577 instruct castPP(rRegP dst)
10578 %{
10579   match(Set dst (CastPP dst));
10580 
10581   size(0);
10582   format %{ "# castPP of $dst" %}
10583   ins_encode(/* empty encoding */);
10584   ins_pipe(empty);
10585 %}
10586 
10587 instruct castII(rRegI dst)
10588 %{
10589   predicate(VerifyConstraintCasts == 0);
10590   match(Set dst (CastII dst));
10591 
10592   size(0);
10593   format %{ "# castII of $dst" %}
10594   ins_encode(/* empty encoding */);
10595   ins_cost(0);
10596   ins_pipe(empty);
10597 %}
10598 
10599 instruct castII_checked(rRegI dst, rFlagsReg cr)
10600 %{
10601   predicate(VerifyConstraintCasts > 0);
10602   match(Set dst (CastII dst));
10603 
10604   effect(KILL cr);
10605   format %{ "# cast_checked_II $dst" %}
10606   ins_encode %{
10607     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10608   %}
10609   ins_pipe(pipe_slow);
10610 %}
10611 
10612 instruct castLL(rRegL dst)
10613 %{
10614   predicate(VerifyConstraintCasts == 0);
10615   match(Set dst (CastLL dst));
10616 
10617   size(0);
10618   format %{ "# castLL of $dst" %}
10619   ins_encode(/* empty encoding */);
10620   ins_cost(0);
10621   ins_pipe(empty);
10622 %}
10623 
10624 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10625 %{
10626   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10627   match(Set dst (CastLL dst));
10628 
10629   effect(KILL cr);
10630   format %{ "# cast_checked_LL $dst" %}
10631   ins_encode %{
10632     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10633   %}
10634   ins_pipe(pipe_slow);
10635 %}
10636 
10637 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10638 %{
10639   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10640   match(Set dst (CastLL dst));
10641 
10642   effect(KILL cr, TEMP tmp);
10643   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10644   ins_encode %{
10645     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10646   %}
10647   ins_pipe(pipe_slow);
10648 %}
10649 
10650 instruct castFF(regF dst)
10651 %{
10652   match(Set dst (CastFF dst));
10653 
10654   size(0);
10655   format %{ "# castFF of $dst" %}
10656   ins_encode(/* empty encoding */);
10657   ins_cost(0);
10658   ins_pipe(empty);
10659 %}
10660 
10661 instruct castHH(regF dst)
10662 %{
10663   match(Set dst (CastHH dst));
10664 
10665   size(0);
10666   format %{ "# castHH of $dst" %}
10667   ins_encode(/* empty encoding */);
10668   ins_cost(0);
10669   ins_pipe(empty);
10670 %}
10671 
10672 instruct castDD(regD dst)
10673 %{
10674   match(Set dst (CastDD dst));
10675 
10676   size(0);
10677   format %{ "# castDD of $dst" %}
10678   ins_encode(/* empty encoding */);
10679   ins_cost(0);
10680   ins_pipe(empty);
10681 %}
10682 
10683 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10684 instruct compareAndSwapP(rRegI res,
10685                          memory mem_ptr,
10686                          rax_RegP oldval, rRegP newval,
10687                          rFlagsReg cr)
10688 %{
10689   predicate(n->as_LoadStore()->barrier_data() == 0);
10690   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10691   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10692   effect(KILL cr, KILL oldval);
10693 
10694   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10695             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10696             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10697   ins_encode %{
10698     __ lock();
10699     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10700     __ setcc(Assembler::equal, $res$$Register);
10701   %}
10702   ins_pipe( pipe_cmpxchg );
10703 %}
10704 
10705 instruct compareAndSwapL(rRegI res,
10706                          memory mem_ptr,
10707                          rax_RegL oldval, rRegL newval,
10708                          rFlagsReg cr)
10709 %{
10710   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10711   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10712   effect(KILL cr, KILL oldval);
10713 
10714   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10715             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10716             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10717   ins_encode %{
10718     __ lock();
10719     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10720     __ setcc(Assembler::equal, $res$$Register);
10721   %}
10722   ins_pipe( pipe_cmpxchg );
10723 %}
10724 
10725 instruct compareAndSwapI(rRegI res,
10726                          memory mem_ptr,
10727                          rax_RegI oldval, rRegI newval,
10728                          rFlagsReg cr)
10729 %{
10730   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10731   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10732   effect(KILL cr, KILL oldval);
10733 
10734   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10735             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10736             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10737   ins_encode %{
10738     __ lock();
10739     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10740     __ setcc(Assembler::equal, $res$$Register);
10741   %}
10742   ins_pipe( pipe_cmpxchg );
10743 %}
10744 
10745 instruct compareAndSwapB(rRegI res,
10746                          memory mem_ptr,
10747                          rax_RegI oldval, rRegI newval,
10748                          rFlagsReg cr)
10749 %{
10750   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10751   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10752   effect(KILL cr, KILL oldval);
10753 
10754   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10755             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10756             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10757   ins_encode %{
10758     __ lock();
10759     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10760     __ setcc(Assembler::equal, $res$$Register);
10761   %}
10762   ins_pipe( pipe_cmpxchg );
10763 %}
10764 
10765 instruct compareAndSwapS(rRegI res,
10766                          memory mem_ptr,
10767                          rax_RegI oldval, rRegI newval,
10768                          rFlagsReg cr)
10769 %{
10770   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10771   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10772   effect(KILL cr, KILL oldval);
10773 
10774   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10775             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10776             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10777   ins_encode %{
10778     __ lock();
10779     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10780     __ setcc(Assembler::equal, $res$$Register);
10781   %}
10782   ins_pipe( pipe_cmpxchg );
10783 %}
10784 
10785 instruct compareAndSwapN(rRegI res,
10786                           memory mem_ptr,
10787                           rax_RegN oldval, rRegN newval,
10788                           rFlagsReg cr) %{
10789   predicate(n->as_LoadStore()->barrier_data() == 0);
10790   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10791   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10792   effect(KILL cr, KILL oldval);
10793 
10794   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10795             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10796             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10797   ins_encode %{
10798     __ lock();
10799     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10800     __ setcc(Assembler::equal, $res$$Register);
10801   %}
10802   ins_pipe( pipe_cmpxchg );
10803 %}
10804 
10805 instruct compareAndExchangeB(
10806                          memory mem_ptr,
10807                          rax_RegI oldval, rRegI newval,
10808                          rFlagsReg cr)
10809 %{
10810   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10811   effect(KILL cr);
10812 
10813   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10814             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10815   ins_encode %{
10816     __ lock();
10817     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10818   %}
10819   ins_pipe( pipe_cmpxchg );
10820 %}
10821 
10822 instruct compareAndExchangeS(
10823                          memory mem_ptr,
10824                          rax_RegI oldval, rRegI newval,
10825                          rFlagsReg cr)
10826 %{
10827   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10828   effect(KILL cr);
10829 
10830   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10831             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10832   ins_encode %{
10833     __ lock();
10834     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10835   %}
10836   ins_pipe( pipe_cmpxchg );
10837 %}
10838 
10839 instruct compareAndExchangeI(
10840                          memory mem_ptr,
10841                          rax_RegI oldval, rRegI newval,
10842                          rFlagsReg cr)
10843 %{
10844   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10845   effect(KILL cr);
10846 
10847   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10848             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10849   ins_encode %{
10850     __ lock();
10851     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10852   %}
10853   ins_pipe( pipe_cmpxchg );
10854 %}
10855 
10856 instruct compareAndExchangeL(
10857                          memory mem_ptr,
10858                          rax_RegL oldval, rRegL newval,
10859                          rFlagsReg cr)
10860 %{
10861   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10862   effect(KILL cr);
10863 
10864   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10865             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10866   ins_encode %{
10867     __ lock();
10868     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10869   %}
10870   ins_pipe( pipe_cmpxchg );
10871 %}
10872 
10873 instruct compareAndExchangeN(
10874                           memory mem_ptr,
10875                           rax_RegN oldval, rRegN newval,
10876                           rFlagsReg cr) %{
10877   predicate(n->as_LoadStore()->barrier_data() == 0);
10878   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10879   effect(KILL cr);
10880 
10881   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10882             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10883   ins_encode %{
10884     __ lock();
10885     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10886   %}
10887   ins_pipe( pipe_cmpxchg );
10888 %}
10889 
10890 instruct compareAndExchangeP(
10891                          memory mem_ptr,
10892                          rax_RegP oldval, rRegP newval,
10893                          rFlagsReg cr)
10894 %{
10895   predicate(n->as_LoadStore()->barrier_data() == 0);
10896   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10897   effect(KILL cr);
10898 
10899   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10900             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10901   ins_encode %{
10902     __ lock();
10903     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10904   %}
10905   ins_pipe( pipe_cmpxchg );
10906 %}
10907 
10908 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10909   predicate(n->as_LoadStore()->result_not_used());
10910   match(Set dummy (GetAndAddB mem add));
10911   effect(KILL cr);
10912   format %{ "addb_lock   $mem, $add" %}
10913   ins_encode %{
10914     __ lock();
10915     __ addb($mem$$Address, $add$$Register);
10916   %}
10917   ins_pipe(pipe_cmpxchg);
10918 %}
10919 
10920 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10921   predicate(n->as_LoadStore()->result_not_used());
10922   match(Set dummy (GetAndAddB mem add));
10923   effect(KILL cr);
10924   format %{ "addb_lock   $mem, $add" %}
10925   ins_encode %{
10926     __ lock();
10927     __ addb($mem$$Address, $add$$constant);
10928   %}
10929   ins_pipe(pipe_cmpxchg);
10930 %}
10931 
10932 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10933   predicate(!n->as_LoadStore()->result_not_used());
10934   match(Set newval (GetAndAddB mem newval));
10935   effect(KILL cr);
10936   format %{ "xaddb_lock  $mem, $newval" %}
10937   ins_encode %{
10938     __ lock();
10939     __ xaddb($mem$$Address, $newval$$Register);
10940   %}
10941   ins_pipe(pipe_cmpxchg);
10942 %}
10943 
10944 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10945   predicate(n->as_LoadStore()->result_not_used());
10946   match(Set dummy (GetAndAddS mem add));
10947   effect(KILL cr);
10948   format %{ "addw_lock   $mem, $add" %}
10949   ins_encode %{
10950     __ lock();
10951     __ addw($mem$$Address, $add$$Register);
10952   %}
10953   ins_pipe(pipe_cmpxchg);
10954 %}
10955 
10956 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10957   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10958   match(Set dummy (GetAndAddS mem add));
10959   effect(KILL cr);
10960   format %{ "addw_lock   $mem, $add" %}
10961   ins_encode %{
10962     __ lock();
10963     __ addw($mem$$Address, $add$$constant);
10964   %}
10965   ins_pipe(pipe_cmpxchg);
10966 %}
10967 
10968 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10969   predicate(!n->as_LoadStore()->result_not_used());
10970   match(Set newval (GetAndAddS mem newval));
10971   effect(KILL cr);
10972   format %{ "xaddw_lock  $mem, $newval" %}
10973   ins_encode %{
10974     __ lock();
10975     __ xaddw($mem$$Address, $newval$$Register);
10976   %}
10977   ins_pipe(pipe_cmpxchg);
10978 %}
10979 
10980 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10981   predicate(n->as_LoadStore()->result_not_used());
10982   match(Set dummy (GetAndAddI mem add));
10983   effect(KILL cr);
10984   format %{ "addl_lock   $mem, $add" %}
10985   ins_encode %{
10986     __ lock();
10987     __ addl($mem$$Address, $add$$Register);
10988   %}
10989   ins_pipe(pipe_cmpxchg);
10990 %}
10991 
10992 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10993   predicate(n->as_LoadStore()->result_not_used());
10994   match(Set dummy (GetAndAddI mem add));
10995   effect(KILL cr);
10996   format %{ "addl_lock   $mem, $add" %}
10997   ins_encode %{
10998     __ lock();
10999     __ addl($mem$$Address, $add$$constant);
11000   %}
11001   ins_pipe(pipe_cmpxchg);
11002 %}
11003 
11004 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
11005   predicate(!n->as_LoadStore()->result_not_used());
11006   match(Set newval (GetAndAddI mem newval));
11007   effect(KILL cr);
11008   format %{ "xaddl_lock  $mem, $newval" %}
11009   ins_encode %{
11010     __ lock();
11011     __ xaddl($mem$$Address, $newval$$Register);
11012   %}
11013   ins_pipe(pipe_cmpxchg);
11014 %}
11015 
11016 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
11017   predicate(n->as_LoadStore()->result_not_used());
11018   match(Set dummy (GetAndAddL mem add));
11019   effect(KILL cr);
11020   format %{ "addq_lock   $mem, $add" %}
11021   ins_encode %{
11022     __ lock();
11023     __ addq($mem$$Address, $add$$Register);
11024   %}
11025   ins_pipe(pipe_cmpxchg);
11026 %}
11027 
11028 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
11029   predicate(n->as_LoadStore()->result_not_used());
11030   match(Set dummy (GetAndAddL mem add));
11031   effect(KILL cr);
11032   format %{ "addq_lock   $mem, $add" %}
11033   ins_encode %{
11034     __ lock();
11035     __ addq($mem$$Address, $add$$constant);
11036   %}
11037   ins_pipe(pipe_cmpxchg);
11038 %}
11039 
11040 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11041   predicate(!n->as_LoadStore()->result_not_used());
11042   match(Set newval (GetAndAddL mem newval));
11043   effect(KILL cr);
11044   format %{ "xaddq_lock  $mem, $newval" %}
11045   ins_encode %{
11046     __ lock();
11047     __ xaddq($mem$$Address, $newval$$Register);
11048   %}
11049   ins_pipe(pipe_cmpxchg);
11050 %}
11051 
11052 instruct xchgB( memory mem, rRegI newval) %{
11053   match(Set newval (GetAndSetB mem newval));
11054   format %{ "XCHGB  $newval,[$mem]" %}
11055   ins_encode %{
11056     __ xchgb($newval$$Register, $mem$$Address);
11057   %}
11058   ins_pipe( pipe_cmpxchg );
11059 %}
11060 
11061 instruct xchgS( memory mem, rRegI newval) %{
11062   match(Set newval (GetAndSetS mem newval));
11063   format %{ "XCHGW  $newval,[$mem]" %}
11064   ins_encode %{
11065     __ xchgw($newval$$Register, $mem$$Address);
11066   %}
11067   ins_pipe( pipe_cmpxchg );
11068 %}
11069 
11070 instruct xchgI( memory mem, rRegI newval) %{
11071   match(Set newval (GetAndSetI mem newval));
11072   format %{ "XCHGL  $newval,[$mem]" %}
11073   ins_encode %{
11074     __ xchgl($newval$$Register, $mem$$Address);
11075   %}
11076   ins_pipe( pipe_cmpxchg );
11077 %}
11078 
11079 instruct xchgL( memory mem, rRegL newval) %{
11080   match(Set newval (GetAndSetL mem newval));
11081   format %{ "XCHGL  $newval,[$mem]" %}
11082   ins_encode %{
11083     __ xchgq($newval$$Register, $mem$$Address);
11084   %}
11085   ins_pipe( pipe_cmpxchg );
11086 %}
11087 
11088 instruct xchgP( memory mem, rRegP newval) %{
11089   match(Set newval (GetAndSetP mem newval));
11090   predicate(n->as_LoadStore()->barrier_data() == 0);
11091   format %{ "XCHGQ  $newval,[$mem]" %}
11092   ins_encode %{
11093     __ xchgq($newval$$Register, $mem$$Address);
11094   %}
11095   ins_pipe( pipe_cmpxchg );
11096 %}
11097 
11098 instruct xchgN( memory mem, rRegN newval) %{
11099   predicate(n->as_LoadStore()->barrier_data() == 0);
11100   match(Set newval (GetAndSetN mem newval));
11101   format %{ "XCHGL  $newval,$mem]" %}
11102   ins_encode %{
11103     __ xchgl($newval$$Register, $mem$$Address);
11104   %}
11105   ins_pipe( pipe_cmpxchg );
11106 %}
11107 
11108 //----------Abs Instructions-------------------------------------------
11109 
11110 // Integer Absolute Instructions
11111 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11112 %{
11113   match(Set dst (AbsI src));
11114   effect(TEMP dst, KILL cr);
11115   format %{ "xorl    $dst, $dst\t# abs int\n\t"
11116             "subl    $dst, $src\n\t"
11117             "cmovll  $dst, $src" %}
11118   ins_encode %{
11119     __ xorl($dst$$Register, $dst$$Register);
11120     __ subl($dst$$Register, $src$$Register);
11121     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11122   %}
11123 
11124   ins_pipe(ialu_reg_reg);
11125 %}
11126 
11127 // Long Absolute Instructions
11128 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11129 %{
11130   match(Set dst (AbsL src));
11131   effect(TEMP dst, KILL cr);
11132   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11133             "subq    $dst, $src\n\t"
11134             "cmovlq  $dst, $src" %}
11135   ins_encode %{
11136     __ xorl($dst$$Register, $dst$$Register);
11137     __ subq($dst$$Register, $src$$Register);
11138     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11139   %}
11140 
11141   ins_pipe(ialu_reg_reg);
11142 %}
11143 
11144 //----------Subtraction Instructions-------------------------------------------
11145 
11146 // Integer Subtraction Instructions
11147 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11148 %{
11149   predicate(!UseAPX);
11150   match(Set dst (SubI dst src));
11151   effect(KILL cr);
11152   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11153 
11154   format %{ "subl    $dst, $src\t# int" %}
11155   ins_encode %{
11156     __ subl($dst$$Register, $src$$Register);
11157   %}
11158   ins_pipe(ialu_reg_reg);
11159 %}
11160 
11161 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11162 %{
11163   predicate(UseAPX);
11164   match(Set dst (SubI src1 src2));
11165   effect(KILL cr);
11166   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11167 
11168   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11169   ins_encode %{
11170     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11171   %}
11172   ins_pipe(ialu_reg_reg);
11173 %}
11174 
11175 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11176 %{
11177   predicate(UseAPX);
11178   match(Set dst (SubI src1 src2));
11179   effect(KILL cr);
11180   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11181 
11182   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11183   ins_encode %{
11184     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11185   %}
11186   ins_pipe(ialu_reg_reg);
11187 %}
11188 
11189 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11190 %{
11191   predicate(UseAPX);
11192   match(Set dst (SubI (LoadI src1) src2));
11193   effect(KILL cr);
11194   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11195 
11196   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11197   ins_encode %{
11198     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11199   %}
11200   ins_pipe(ialu_reg_reg);
11201 %}
11202 
11203 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11204 %{
11205   predicate(!UseAPX);
11206   match(Set dst (SubI dst (LoadI src)));
11207   effect(KILL cr);
11208   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11209 
11210   ins_cost(150);
11211   format %{ "subl    $dst, $src\t# int" %}
11212   ins_encode %{
11213     __ subl($dst$$Register, $src$$Address);
11214   %}
11215   ins_pipe(ialu_reg_mem);
11216 %}
11217 
11218 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11219 %{
11220   predicate(UseAPX);
11221   match(Set dst (SubI src1 (LoadI src2)));
11222   effect(KILL cr);
11223   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11224 
11225   ins_cost(150);
11226   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11227   ins_encode %{
11228     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11229   %}
11230   ins_pipe(ialu_reg_mem);
11231 %}
11232 
11233 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11234 %{
11235   predicate(UseAPX);
11236   match(Set dst (SubI (LoadI src1) src2));
11237   effect(KILL cr);
11238   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11239 
11240   ins_cost(150);
11241   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11242   ins_encode %{
11243     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11244   %}
11245   ins_pipe(ialu_reg_mem);
11246 %}
11247 
11248 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11249 %{
11250   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11251   effect(KILL cr);
11252   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11253 
11254   ins_cost(150);
11255   format %{ "subl    $dst, $src\t# int" %}
11256   ins_encode %{
11257     __ subl($dst$$Address, $src$$Register);
11258   %}
11259   ins_pipe(ialu_mem_reg);
11260 %}
11261 
11262 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11263 %{
11264   predicate(!UseAPX);
11265   match(Set dst (SubL dst src));
11266   effect(KILL cr);
11267   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11268 
11269   format %{ "subq    $dst, $src\t# long" %}
11270   ins_encode %{
11271     __ subq($dst$$Register, $src$$Register);
11272   %}
11273   ins_pipe(ialu_reg_reg);
11274 %}
11275 
11276 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11277 %{
11278   predicate(UseAPX);
11279   match(Set dst (SubL src1 src2));
11280   effect(KILL cr);
11281   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11282 
11283   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11284   ins_encode %{
11285     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11286   %}
11287   ins_pipe(ialu_reg_reg);
11288 %}
11289 
11290 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11291 %{
11292   predicate(UseAPX);
11293   match(Set dst (SubL src1 src2));
11294   effect(KILL cr);
11295   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11296 
11297   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11298   ins_encode %{
11299     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11300   %}
11301   ins_pipe(ialu_reg_reg);
11302 %}
11303 
11304 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11305 %{
11306   predicate(UseAPX);
11307   match(Set dst (SubL (LoadL src1) src2));
11308   effect(KILL cr);
11309   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11310 
11311   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11312   ins_encode %{
11313     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11314   %}
11315   ins_pipe(ialu_reg_reg);
11316 %}
11317 
11318 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11319 %{
11320   predicate(!UseAPX);
11321   match(Set dst (SubL dst (LoadL src)));
11322   effect(KILL cr);
11323   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11324 
11325   ins_cost(150);
11326   format %{ "subq    $dst, $src\t# long" %}
11327   ins_encode %{
11328     __ subq($dst$$Register, $src$$Address);
11329   %}
11330   ins_pipe(ialu_reg_mem);
11331 %}
11332 
11333 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11334 %{
11335   predicate(UseAPX);
11336   match(Set dst (SubL src1 (LoadL src2)));
11337   effect(KILL cr);
11338   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11339 
11340   ins_cost(150);
11341   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11342   ins_encode %{
11343     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11344   %}
11345   ins_pipe(ialu_reg_mem);
11346 %}
11347 
11348 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11349 %{
11350   predicate(UseAPX);
11351   match(Set dst (SubL (LoadL src1) src2));
11352   effect(KILL cr);
11353   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11354 
11355   ins_cost(150);
11356   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11357   ins_encode %{
11358     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11359   %}
11360   ins_pipe(ialu_reg_mem);
11361 %}
11362 
11363 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11364 %{
11365   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11366   effect(KILL cr);
11367   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11368 
11369   ins_cost(150);
11370   format %{ "subq    $dst, $src\t# long" %}
11371   ins_encode %{
11372     __ subq($dst$$Address, $src$$Register);
11373   %}
11374   ins_pipe(ialu_mem_reg);
11375 %}
11376 
11377 // Subtract from a pointer
11378 // XXX hmpf???
11379 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11380 %{
11381   match(Set dst (AddP dst (SubI zero src)));
11382   effect(KILL cr);
11383 
11384   format %{ "subq    $dst, $src\t# ptr - int" %}
11385   ins_encode %{
11386     __ subq($dst$$Register, $src$$Register);
11387   %}
11388   ins_pipe(ialu_reg_reg);
11389 %}
11390 
11391 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11392 %{
11393   predicate(!UseAPX);
11394   match(Set dst (SubI zero dst));
11395   effect(KILL cr);
11396   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11397 
11398   format %{ "negl    $dst\t# int" %}
11399   ins_encode %{
11400     __ negl($dst$$Register);
11401   %}
11402   ins_pipe(ialu_reg);
11403 %}
11404 
11405 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11406 %{
11407   predicate(UseAPX);
11408   match(Set dst (SubI zero src));
11409   effect(KILL cr);
11410   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11411 
11412   format %{ "enegl    $dst, $src\t# int ndd" %}
11413   ins_encode %{
11414     __ enegl($dst$$Register, $src$$Register, false);
11415   %}
11416   ins_pipe(ialu_reg);
11417 %}
11418 
11419 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11420 %{
11421   predicate(!UseAPX);
11422   match(Set dst (NegI dst));
11423   effect(KILL cr);
11424   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11425 
11426   format %{ "negl    $dst\t# int" %}
11427   ins_encode %{
11428     __ negl($dst$$Register);
11429   %}
11430   ins_pipe(ialu_reg);
11431 %}
11432 
11433 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11434 %{
11435   predicate(UseAPX);
11436   match(Set dst (NegI src));
11437   effect(KILL cr);
11438   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11439 
11440   format %{ "enegl    $dst, $src\t# int ndd" %}
11441   ins_encode %{
11442     __ enegl($dst$$Register, $src$$Register, false);
11443   %}
11444   ins_pipe(ialu_reg);
11445 %}
11446 
11447 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11448 %{
11449   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11450   effect(KILL cr);
11451   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11452 
11453   format %{ "negl    $dst\t# int" %}
11454   ins_encode %{
11455     __ negl($dst$$Address);
11456   %}
11457   ins_pipe(ialu_reg);
11458 %}
11459 
11460 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11461 %{
11462   predicate(!UseAPX);
11463   match(Set dst (SubL zero dst));
11464   effect(KILL cr);
11465   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11466 
11467   format %{ "negq    $dst\t# long" %}
11468   ins_encode %{
11469     __ negq($dst$$Register);
11470   %}
11471   ins_pipe(ialu_reg);
11472 %}
11473 
11474 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11475 %{
11476   predicate(UseAPX);
11477   match(Set dst (SubL zero src));
11478   effect(KILL cr);
11479   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11480 
11481   format %{ "enegq    $dst, $src\t# long ndd" %}
11482   ins_encode %{
11483     __ enegq($dst$$Register, $src$$Register, false);
11484   %}
11485   ins_pipe(ialu_reg);
11486 %}
11487 
11488 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11489 %{
11490   predicate(!UseAPX);
11491   match(Set dst (NegL dst));
11492   effect(KILL cr);
11493   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11494 
11495   format %{ "negq    $dst\t# int" %}
11496   ins_encode %{
11497     __ negq($dst$$Register);
11498   %}
11499   ins_pipe(ialu_reg);
11500 %}
11501 
11502 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11503 %{
11504   predicate(UseAPX);
11505   match(Set dst (NegL src));
11506   effect(KILL cr);
11507   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11508 
11509   format %{ "enegq    $dst, $src\t# long ndd" %}
11510   ins_encode %{
11511     __ enegq($dst$$Register, $src$$Register, false);
11512   %}
11513   ins_pipe(ialu_reg);
11514 %}
11515 
11516 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11517 %{
11518   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11519   effect(KILL cr);
11520   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11521 
11522   format %{ "negq    $dst\t# long" %}
11523   ins_encode %{
11524     __ negq($dst$$Address);
11525   %}
11526   ins_pipe(ialu_reg);
11527 %}
11528 
11529 //----------Multiplication/Division Instructions-------------------------------
11530 // Integer Multiplication Instructions
11531 // Multiply Register
11532 
11533 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11534 %{
11535   predicate(!UseAPX);
11536   match(Set dst (MulI dst src));
11537   effect(KILL cr);
11538 
11539   ins_cost(300);
11540   format %{ "imull   $dst, $src\t# int" %}
11541   ins_encode %{
11542     __ imull($dst$$Register, $src$$Register);
11543   %}
11544   ins_pipe(ialu_reg_reg_alu0);
11545 %}
11546 
11547 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11548 %{
11549   predicate(UseAPX);
11550   match(Set dst (MulI src1 src2));
11551   effect(KILL cr);
11552   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11553 
11554   ins_cost(300);
11555   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11556   ins_encode %{
11557     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11558   %}
11559   ins_pipe(ialu_reg_reg_alu0);
11560 %}
11561 
11562 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11563 %{
11564   match(Set dst (MulI src imm));
11565   effect(KILL cr);
11566 
11567   ins_cost(300);
11568   format %{ "imull   $dst, $src, $imm\t# int" %}
11569   ins_encode %{
11570     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11571   %}
11572   ins_pipe(ialu_reg_reg_alu0);
11573 %}
11574 
11575 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11576 %{
11577   predicate(!UseAPX);
11578   match(Set dst (MulI dst (LoadI src)));
11579   effect(KILL cr);
11580 
11581   ins_cost(350);
11582   format %{ "imull   $dst, $src\t# int" %}
11583   ins_encode %{
11584     __ imull($dst$$Register, $src$$Address);
11585   %}
11586   ins_pipe(ialu_reg_mem_alu0);
11587 %}
11588 
11589 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11590 %{
11591   predicate(UseAPX);
11592   match(Set dst (MulI src1 (LoadI src2)));
11593   effect(KILL cr);
11594   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11595 
11596   ins_cost(350);
11597   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11598   ins_encode %{
11599     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11600   %}
11601   ins_pipe(ialu_reg_mem_alu0);
11602 %}
11603 
11604 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11605 %{
11606   match(Set dst (MulI (LoadI src) imm));
11607   effect(KILL cr);
11608 
11609   ins_cost(300);
11610   format %{ "imull   $dst, $src, $imm\t# int" %}
11611   ins_encode %{
11612     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11613   %}
11614   ins_pipe(ialu_reg_mem_alu0);
11615 %}
11616 
11617 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11618 %{
11619   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11620   effect(KILL cr, KILL src2);
11621 
11622   expand %{ mulI_rReg(dst, src1, cr);
11623            mulI_rReg(src2, src3, cr);
11624            addI_rReg(dst, src2, cr); %}
11625 %}
11626 
11627 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11628 %{
11629   predicate(!UseAPX);
11630   match(Set dst (MulL dst src));
11631   effect(KILL cr);
11632 
11633   ins_cost(300);
11634   format %{ "imulq   $dst, $src\t# long" %}
11635   ins_encode %{
11636     __ imulq($dst$$Register, $src$$Register);
11637   %}
11638   ins_pipe(ialu_reg_reg_alu0);
11639 %}
11640 
11641 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11642 %{
11643   predicate(UseAPX);
11644   match(Set dst (MulL src1 src2));
11645   effect(KILL cr);
11646   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11647 
11648   ins_cost(300);
11649   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11650   ins_encode %{
11651     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11652   %}
11653   ins_pipe(ialu_reg_reg_alu0);
11654 %}
11655 
11656 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11657 %{
11658   match(Set dst (MulL src imm));
11659   effect(KILL cr);
11660 
11661   ins_cost(300);
11662   format %{ "imulq   $dst, $src, $imm\t# long" %}
11663   ins_encode %{
11664     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11665   %}
11666   ins_pipe(ialu_reg_reg_alu0);
11667 %}
11668 
11669 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11670 %{
11671   predicate(!UseAPX);
11672   match(Set dst (MulL dst (LoadL src)));
11673   effect(KILL cr);
11674 
11675   ins_cost(350);
11676   format %{ "imulq   $dst, $src\t# long" %}
11677   ins_encode %{
11678     __ imulq($dst$$Register, $src$$Address);
11679   %}
11680   ins_pipe(ialu_reg_mem_alu0);
11681 %}
11682 
11683 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11684 %{
11685   predicate(UseAPX);
11686   match(Set dst (MulL src1 (LoadL src2)));
11687   effect(KILL cr);
11688   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11689 
11690   ins_cost(350);
11691   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11692   ins_encode %{
11693     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11694   %}
11695   ins_pipe(ialu_reg_mem_alu0);
11696 %}
11697 
11698 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11699 %{
11700   match(Set dst (MulL (LoadL src) imm));
11701   effect(KILL cr);
11702 
11703   ins_cost(300);
11704   format %{ "imulq   $dst, $src, $imm\t# long" %}
11705   ins_encode %{
11706     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11707   %}
11708   ins_pipe(ialu_reg_mem_alu0);
11709 %}
11710 
11711 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11712 %{
11713   match(Set dst (MulHiL src rax));
11714   effect(USE_KILL rax, KILL cr);
11715 
11716   ins_cost(300);
11717   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11718   ins_encode %{
11719     __ imulq($src$$Register);
11720   %}
11721   ins_pipe(ialu_reg_reg_alu0);
11722 %}
11723 
11724 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11725 %{
11726   match(Set dst (UMulHiL src rax));
11727   effect(USE_KILL rax, KILL cr);
11728 
11729   ins_cost(300);
11730   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11731   ins_encode %{
11732     __ mulq($src$$Register);
11733   %}
11734   ins_pipe(ialu_reg_reg_alu0);
11735 %}
11736 
11737 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11738                    rFlagsReg cr)
11739 %{
11740   match(Set rax (DivI rax div));
11741   effect(KILL rdx, KILL cr);
11742 
11743   ins_cost(30*100+10*100); // XXX
11744   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11745             "jne,s   normal\n\t"
11746             "xorl    rdx, rdx\n\t"
11747             "cmpl    $div, -1\n\t"
11748             "je,s    done\n"
11749     "normal: cdql\n\t"
11750             "idivl   $div\n"
11751     "done:"        %}
11752   ins_encode(cdql_enc(div));
11753   ins_pipe(ialu_reg_reg_alu0);
11754 %}
11755 
11756 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11757                    rFlagsReg cr)
11758 %{
11759   match(Set rax (DivL rax div));
11760   effect(KILL rdx, KILL cr);
11761 
11762   ins_cost(30*100+10*100); // XXX
11763   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11764             "cmpq    rax, rdx\n\t"
11765             "jne,s   normal\n\t"
11766             "xorl    rdx, rdx\n\t"
11767             "cmpq    $div, -1\n\t"
11768             "je,s    done\n"
11769     "normal: cdqq\n\t"
11770             "idivq   $div\n"
11771     "done:"        %}
11772   ins_encode(cdqq_enc(div));
11773   ins_pipe(ialu_reg_reg_alu0);
11774 %}
11775 
11776 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11777 %{
11778   match(Set rax (UDivI rax div));
11779   effect(KILL rdx, KILL cr);
11780 
11781   ins_cost(300);
11782   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11783   ins_encode %{
11784     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11785   %}
11786   ins_pipe(ialu_reg_reg_alu0);
11787 %}
11788 
11789 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11790 %{
11791   match(Set rax (UDivL rax div));
11792   effect(KILL rdx, KILL cr);
11793 
11794   ins_cost(300);
11795   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11796   ins_encode %{
11797      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11798   %}
11799   ins_pipe(ialu_reg_reg_alu0);
11800 %}
11801 
11802 // Integer DIVMOD with Register, both quotient and mod results
11803 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11804                              rFlagsReg cr)
11805 %{
11806   match(DivModI rax div);
11807   effect(KILL cr);
11808 
11809   ins_cost(30*100+10*100); // XXX
11810   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11811             "jne,s   normal\n\t"
11812             "xorl    rdx, rdx\n\t"
11813             "cmpl    $div, -1\n\t"
11814             "je,s    done\n"
11815     "normal: cdql\n\t"
11816             "idivl   $div\n"
11817     "done:"        %}
11818   ins_encode(cdql_enc(div));
11819   ins_pipe(pipe_slow);
11820 %}
11821 
11822 // Long DIVMOD with Register, both quotient and mod results
11823 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11824                              rFlagsReg cr)
11825 %{
11826   match(DivModL rax div);
11827   effect(KILL cr);
11828 
11829   ins_cost(30*100+10*100); // XXX
11830   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11831             "cmpq    rax, rdx\n\t"
11832             "jne,s   normal\n\t"
11833             "xorl    rdx, rdx\n\t"
11834             "cmpq    $div, -1\n\t"
11835             "je,s    done\n"
11836     "normal: cdqq\n\t"
11837             "idivq   $div\n"
11838     "done:"        %}
11839   ins_encode(cdqq_enc(div));
11840   ins_pipe(pipe_slow);
11841 %}
11842 
11843 // Unsigned integer DIVMOD with Register, both quotient and mod results
11844 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11845                               no_rax_rdx_RegI div, rFlagsReg cr)
11846 %{
11847   match(UDivModI rax div);
11848   effect(TEMP tmp, KILL cr);
11849 
11850   ins_cost(300);
11851   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11852             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11853           %}
11854   ins_encode %{
11855     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11856   %}
11857   ins_pipe(pipe_slow);
11858 %}
11859 
11860 // Unsigned long DIVMOD with Register, both quotient and mod results
11861 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11862                               no_rax_rdx_RegL div, rFlagsReg cr)
11863 %{
11864   match(UDivModL rax div);
11865   effect(TEMP tmp, KILL cr);
11866 
11867   ins_cost(300);
11868   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11869             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11870           %}
11871   ins_encode %{
11872     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11873   %}
11874   ins_pipe(pipe_slow);
11875 %}
11876 
11877 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11878                    rFlagsReg cr)
11879 %{
11880   match(Set rdx (ModI rax div));
11881   effect(KILL rax, KILL cr);
11882 
11883   ins_cost(300); // XXX
11884   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11885             "jne,s   normal\n\t"
11886             "xorl    rdx, rdx\n\t"
11887             "cmpl    $div, -1\n\t"
11888             "je,s    done\n"
11889     "normal: cdql\n\t"
11890             "idivl   $div\n"
11891     "done:"        %}
11892   ins_encode(cdql_enc(div));
11893   ins_pipe(ialu_reg_reg_alu0);
11894 %}
11895 
11896 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11897                    rFlagsReg cr)
11898 %{
11899   match(Set rdx (ModL rax div));
11900   effect(KILL rax, KILL cr);
11901 
11902   ins_cost(300); // XXX
11903   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11904             "cmpq    rax, rdx\n\t"
11905             "jne,s   normal\n\t"
11906             "xorl    rdx, rdx\n\t"
11907             "cmpq    $div, -1\n\t"
11908             "je,s    done\n"
11909     "normal: cdqq\n\t"
11910             "idivq   $div\n"
11911     "done:"        %}
11912   ins_encode(cdqq_enc(div));
11913   ins_pipe(ialu_reg_reg_alu0);
11914 %}
11915 
11916 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11917 %{
11918   match(Set rdx (UModI rax div));
11919   effect(KILL rax, KILL cr);
11920 
11921   ins_cost(300);
11922   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11923   ins_encode %{
11924     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11925   %}
11926   ins_pipe(ialu_reg_reg_alu0);
11927 %}
11928 
11929 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11930 %{
11931   match(Set rdx (UModL rax div));
11932   effect(KILL rax, KILL cr);
11933 
11934   ins_cost(300);
11935   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11936   ins_encode %{
11937     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11938   %}
11939   ins_pipe(ialu_reg_reg_alu0);
11940 %}
11941 
11942 // Integer Shift Instructions
11943 // Shift Left by one, two, three
11944 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11945 %{
11946   predicate(!UseAPX);
11947   match(Set dst (LShiftI dst shift));
11948   effect(KILL cr);
11949 
11950   format %{ "sall    $dst, $shift" %}
11951   ins_encode %{
11952     __ sall($dst$$Register, $shift$$constant);
11953   %}
11954   ins_pipe(ialu_reg);
11955 %}
11956 
11957 // Shift Left by one, two, three
11958 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11959 %{
11960   predicate(UseAPX);
11961   match(Set dst (LShiftI src shift));
11962   effect(KILL cr);
11963   flag(PD::Flag_ndd_demotable_opr1);
11964 
11965   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11966   ins_encode %{
11967     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11968   %}
11969   ins_pipe(ialu_reg);
11970 %}
11971 
11972 // Shift Left by 8-bit immediate
11973 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11974 %{
11975   predicate(!UseAPX);
11976   match(Set dst (LShiftI dst shift));
11977   effect(KILL cr);
11978 
11979   format %{ "sall    $dst, $shift" %}
11980   ins_encode %{
11981     __ sall($dst$$Register, $shift$$constant);
11982   %}
11983   ins_pipe(ialu_reg);
11984 %}
11985 
11986 // Shift Left by 8-bit immediate
11987 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11988 %{
11989   predicate(UseAPX);
11990   match(Set dst (LShiftI src shift));
11991   effect(KILL cr);
11992   flag(PD::Flag_ndd_demotable_opr1);
11993 
11994   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11995   ins_encode %{
11996     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11997   %}
11998   ins_pipe(ialu_reg);
11999 %}
12000 
12001 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12002 %{
12003   predicate(UseAPX);
12004   match(Set dst (LShiftI (LoadI src) shift));
12005   effect(KILL cr);
12006 
12007   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
12008   ins_encode %{
12009     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
12010   %}
12011   ins_pipe(ialu_reg);
12012 %}
12013 
12014 // Shift Left by 8-bit immediate
12015 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12016 %{
12017   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12018   effect(KILL cr);
12019 
12020   format %{ "sall    $dst, $shift" %}
12021   ins_encode %{
12022     __ sall($dst$$Address, $shift$$constant);
12023   %}
12024   ins_pipe(ialu_mem_imm);
12025 %}
12026 
12027 // Shift Left by variable
12028 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12029 %{
12030   predicate(!VM_Version::supports_bmi2());
12031   match(Set dst (LShiftI dst shift));
12032   effect(KILL cr);
12033 
12034   format %{ "sall    $dst, $shift" %}
12035   ins_encode %{
12036     __ sall($dst$$Register);
12037   %}
12038   ins_pipe(ialu_reg_reg);
12039 %}
12040 
12041 // Shift Left by variable
12042 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12043 %{
12044   predicate(!VM_Version::supports_bmi2());
12045   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12046   effect(KILL cr);
12047 
12048   format %{ "sall    $dst, $shift" %}
12049   ins_encode %{
12050     __ sall($dst$$Address);
12051   %}
12052   ins_pipe(ialu_mem_reg);
12053 %}
12054 
12055 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12056 %{
12057   predicate(VM_Version::supports_bmi2());
12058   match(Set dst (LShiftI src shift));
12059 
12060   format %{ "shlxl   $dst, $src, $shift" %}
12061   ins_encode %{
12062     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12063   %}
12064   ins_pipe(ialu_reg_reg);
12065 %}
12066 
12067 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12068 %{
12069   predicate(VM_Version::supports_bmi2());
12070   match(Set dst (LShiftI (LoadI src) shift));
12071   ins_cost(175);
12072   format %{ "shlxl   $dst, $src, $shift" %}
12073   ins_encode %{
12074     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12075   %}
12076   ins_pipe(ialu_reg_mem);
12077 %}
12078 
12079 // Arithmetic Shift Right by 8-bit immediate
12080 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12081 %{
12082   predicate(!UseAPX);
12083   match(Set dst (RShiftI dst shift));
12084   effect(KILL cr);
12085 
12086   format %{ "sarl    $dst, $shift" %}
12087   ins_encode %{
12088     __ sarl($dst$$Register, $shift$$constant);
12089   %}
12090   ins_pipe(ialu_mem_imm);
12091 %}
12092 
12093 // Arithmetic Shift Right by 8-bit immediate
12094 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12095 %{
12096   predicate(UseAPX);
12097   match(Set dst (RShiftI src shift));
12098   effect(KILL cr);
12099   flag(PD::Flag_ndd_demotable_opr1);
12100 
12101   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12102   ins_encode %{
12103     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12104   %}
12105   ins_pipe(ialu_mem_imm);
12106 %}
12107 
12108 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12109 %{
12110   predicate(UseAPX);
12111   match(Set dst (RShiftI (LoadI src) shift));
12112   effect(KILL cr);
12113 
12114   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12115   ins_encode %{
12116     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12117   %}
12118   ins_pipe(ialu_mem_imm);
12119 %}
12120 
12121 // Arithmetic Shift Right by 8-bit immediate
12122 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12123 %{
12124   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12125   effect(KILL cr);
12126 
12127   format %{ "sarl    $dst, $shift" %}
12128   ins_encode %{
12129     __ sarl($dst$$Address, $shift$$constant);
12130   %}
12131   ins_pipe(ialu_mem_imm);
12132 %}
12133 
12134 // Arithmetic Shift Right by variable
12135 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12136 %{
12137   predicate(!VM_Version::supports_bmi2());
12138   match(Set dst (RShiftI dst shift));
12139   effect(KILL cr);
12140 
12141   format %{ "sarl    $dst, $shift" %}
12142   ins_encode %{
12143     __ sarl($dst$$Register);
12144   %}
12145   ins_pipe(ialu_reg_reg);
12146 %}
12147 
12148 // Arithmetic Shift Right by variable
12149 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12150 %{
12151   predicate(!VM_Version::supports_bmi2());
12152   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12153   effect(KILL cr);
12154 
12155   format %{ "sarl    $dst, $shift" %}
12156   ins_encode %{
12157     __ sarl($dst$$Address);
12158   %}
12159   ins_pipe(ialu_mem_reg);
12160 %}
12161 
12162 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12163 %{
12164   predicate(VM_Version::supports_bmi2());
12165   match(Set dst (RShiftI src shift));
12166 
12167   format %{ "sarxl   $dst, $src, $shift" %}
12168   ins_encode %{
12169     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12170   %}
12171   ins_pipe(ialu_reg_reg);
12172 %}
12173 
12174 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12175 %{
12176   predicate(VM_Version::supports_bmi2());
12177   match(Set dst (RShiftI (LoadI src) shift));
12178   ins_cost(175);
12179   format %{ "sarxl   $dst, $src, $shift" %}
12180   ins_encode %{
12181     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12182   %}
12183   ins_pipe(ialu_reg_mem);
12184 %}
12185 
12186 // Logical Shift Right by 8-bit immediate
12187 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12188 %{
12189   predicate(!UseAPX);
12190   match(Set dst (URShiftI dst shift));
12191   effect(KILL cr);
12192 
12193   format %{ "shrl    $dst, $shift" %}
12194   ins_encode %{
12195     __ shrl($dst$$Register, $shift$$constant);
12196   %}
12197   ins_pipe(ialu_reg);
12198 %}
12199 
12200 // Logical Shift Right by 8-bit immediate
12201 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12202 %{
12203   predicate(UseAPX);
12204   match(Set dst (URShiftI src shift));
12205   effect(KILL cr);
12206   flag(PD::Flag_ndd_demotable_opr1);
12207 
12208   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12209   ins_encode %{
12210     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12211   %}
12212   ins_pipe(ialu_reg);
12213 %}
12214 
12215 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12216 %{
12217   predicate(UseAPX);
12218   match(Set dst (URShiftI (LoadI src) shift));
12219   effect(KILL cr);
12220 
12221   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12222   ins_encode %{
12223     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12224   %}
12225   ins_pipe(ialu_reg);
12226 %}
12227 
12228 // Logical Shift Right by 8-bit immediate
12229 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12230 %{
12231   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12232   effect(KILL cr);
12233 
12234   format %{ "shrl    $dst, $shift" %}
12235   ins_encode %{
12236     __ shrl($dst$$Address, $shift$$constant);
12237   %}
12238   ins_pipe(ialu_mem_imm);
12239 %}
12240 
12241 // Logical Shift Right by variable
12242 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12243 %{
12244   predicate(!VM_Version::supports_bmi2());
12245   match(Set dst (URShiftI dst shift));
12246   effect(KILL cr);
12247 
12248   format %{ "shrl    $dst, $shift" %}
12249   ins_encode %{
12250     __ shrl($dst$$Register);
12251   %}
12252   ins_pipe(ialu_reg_reg);
12253 %}
12254 
12255 // Logical Shift Right by variable
12256 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12257 %{
12258   predicate(!VM_Version::supports_bmi2());
12259   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12260   effect(KILL cr);
12261 
12262   format %{ "shrl    $dst, $shift" %}
12263   ins_encode %{
12264     __ shrl($dst$$Address);
12265   %}
12266   ins_pipe(ialu_mem_reg);
12267 %}
12268 
12269 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12270 %{
12271   predicate(VM_Version::supports_bmi2());
12272   match(Set dst (URShiftI src shift));
12273 
12274   format %{ "shrxl   $dst, $src, $shift" %}
12275   ins_encode %{
12276     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12277   %}
12278   ins_pipe(ialu_reg_reg);
12279 %}
12280 
12281 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12282 %{
12283   predicate(VM_Version::supports_bmi2());
12284   match(Set dst (URShiftI (LoadI src) shift));
12285   ins_cost(175);
12286   format %{ "shrxl   $dst, $src, $shift" %}
12287   ins_encode %{
12288     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12289   %}
12290   ins_pipe(ialu_reg_mem);
12291 %}
12292 
12293 // Long Shift Instructions
12294 // Shift Left by one, two, three
12295 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12296 %{
12297   predicate(!UseAPX);
12298   match(Set dst (LShiftL dst shift));
12299   effect(KILL cr);
12300 
12301   format %{ "salq    $dst, $shift" %}
12302   ins_encode %{
12303     __ salq($dst$$Register, $shift$$constant);
12304   %}
12305   ins_pipe(ialu_reg);
12306 %}
12307 
12308 // Shift Left by one, two, three
12309 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12310 %{
12311   predicate(UseAPX);
12312   match(Set dst (LShiftL src shift));
12313   effect(KILL cr);
12314   flag(PD::Flag_ndd_demotable_opr1);
12315 
12316   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12317   ins_encode %{
12318     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12319   %}
12320   ins_pipe(ialu_reg);
12321 %}
12322 
12323 // Shift Left by 8-bit immediate
12324 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12325 %{
12326   predicate(!UseAPX);
12327   match(Set dst (LShiftL dst shift));
12328   effect(KILL cr);
12329 
12330   format %{ "salq    $dst, $shift" %}
12331   ins_encode %{
12332     __ salq($dst$$Register, $shift$$constant);
12333   %}
12334   ins_pipe(ialu_reg);
12335 %}
12336 
12337 // Shift Left by 8-bit immediate
12338 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12339 %{
12340   predicate(UseAPX);
12341   match(Set dst (LShiftL src shift));
12342   effect(KILL cr);
12343   flag(PD::Flag_ndd_demotable_opr1);
12344 
12345   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12346   ins_encode %{
12347     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12348   %}
12349   ins_pipe(ialu_reg);
12350 %}
12351 
12352 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12353 %{
12354   predicate(UseAPX);
12355   match(Set dst (LShiftL (LoadL src) shift));
12356   effect(KILL cr);
12357 
12358   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12359   ins_encode %{
12360     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12361   %}
12362   ins_pipe(ialu_reg);
12363 %}
12364 
12365 // Shift Left by 8-bit immediate
12366 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12367 %{
12368   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12369   effect(KILL cr);
12370 
12371   format %{ "salq    $dst, $shift" %}
12372   ins_encode %{
12373     __ salq($dst$$Address, $shift$$constant);
12374   %}
12375   ins_pipe(ialu_mem_imm);
12376 %}
12377 
12378 // Shift Left by variable
12379 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12380 %{
12381   predicate(!VM_Version::supports_bmi2());
12382   match(Set dst (LShiftL dst shift));
12383   effect(KILL cr);
12384 
12385   format %{ "salq    $dst, $shift" %}
12386   ins_encode %{
12387     __ salq($dst$$Register);
12388   %}
12389   ins_pipe(ialu_reg_reg);
12390 %}
12391 
12392 // Shift Left by variable
12393 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12394 %{
12395   predicate(!VM_Version::supports_bmi2());
12396   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12397   effect(KILL cr);
12398 
12399   format %{ "salq    $dst, $shift" %}
12400   ins_encode %{
12401     __ salq($dst$$Address);
12402   %}
12403   ins_pipe(ialu_mem_reg);
12404 %}
12405 
12406 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12407 %{
12408   predicate(VM_Version::supports_bmi2());
12409   match(Set dst (LShiftL src shift));
12410 
12411   format %{ "shlxq   $dst, $src, $shift" %}
12412   ins_encode %{
12413     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12414   %}
12415   ins_pipe(ialu_reg_reg);
12416 %}
12417 
12418 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12419 %{
12420   predicate(VM_Version::supports_bmi2());
12421   match(Set dst (LShiftL (LoadL src) shift));
12422   ins_cost(175);
12423   format %{ "shlxq   $dst, $src, $shift" %}
12424   ins_encode %{
12425     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12426   %}
12427   ins_pipe(ialu_reg_mem);
12428 %}
12429 
12430 // Arithmetic Shift Right by 8-bit immediate
12431 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12432 %{
12433   predicate(!UseAPX);
12434   match(Set dst (RShiftL dst shift));
12435   effect(KILL cr);
12436 
12437   format %{ "sarq    $dst, $shift" %}
12438   ins_encode %{
12439     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12440   %}
12441   ins_pipe(ialu_mem_imm);
12442 %}
12443 
12444 // Arithmetic Shift Right by 8-bit immediate
12445 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12446 %{
12447   predicate(UseAPX);
12448   match(Set dst (RShiftL src shift));
12449   effect(KILL cr);
12450   flag(PD::Flag_ndd_demotable_opr1);
12451 
12452   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12453   ins_encode %{
12454     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12455   %}
12456   ins_pipe(ialu_mem_imm);
12457 %}
12458 
12459 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12460 %{
12461   predicate(UseAPX);
12462   match(Set dst (RShiftL (LoadL src) shift));
12463   effect(KILL cr);
12464 
12465   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12466   ins_encode %{
12467     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12468   %}
12469   ins_pipe(ialu_mem_imm);
12470 %}
12471 
12472 // Arithmetic Shift Right by 8-bit immediate
12473 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12474 %{
12475   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12476   effect(KILL cr);
12477 
12478   format %{ "sarq    $dst, $shift" %}
12479   ins_encode %{
12480     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12481   %}
12482   ins_pipe(ialu_mem_imm);
12483 %}
12484 
12485 // Arithmetic Shift Right by variable
12486 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12487 %{
12488   predicate(!VM_Version::supports_bmi2());
12489   match(Set dst (RShiftL dst shift));
12490   effect(KILL cr);
12491 
12492   format %{ "sarq    $dst, $shift" %}
12493   ins_encode %{
12494     __ sarq($dst$$Register);
12495   %}
12496   ins_pipe(ialu_reg_reg);
12497 %}
12498 
12499 // Arithmetic Shift Right by variable
12500 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12501 %{
12502   predicate(!VM_Version::supports_bmi2());
12503   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12504   effect(KILL cr);
12505 
12506   format %{ "sarq    $dst, $shift" %}
12507   ins_encode %{
12508     __ sarq($dst$$Address);
12509   %}
12510   ins_pipe(ialu_mem_reg);
12511 %}
12512 
12513 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12514 %{
12515   predicate(VM_Version::supports_bmi2());
12516   match(Set dst (RShiftL src shift));
12517 
12518   format %{ "sarxq   $dst, $src, $shift" %}
12519   ins_encode %{
12520     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12521   %}
12522   ins_pipe(ialu_reg_reg);
12523 %}
12524 
12525 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12526 %{
12527   predicate(VM_Version::supports_bmi2());
12528   match(Set dst (RShiftL (LoadL src) shift));
12529   ins_cost(175);
12530   format %{ "sarxq   $dst, $src, $shift" %}
12531   ins_encode %{
12532     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12533   %}
12534   ins_pipe(ialu_reg_mem);
12535 %}
12536 
12537 // Logical Shift Right by 8-bit immediate
12538 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12539 %{
12540   predicate(!UseAPX);
12541   match(Set dst (URShiftL dst shift));
12542   effect(KILL cr);
12543 
12544   format %{ "shrq    $dst, $shift" %}
12545   ins_encode %{
12546     __ shrq($dst$$Register, $shift$$constant);
12547   %}
12548   ins_pipe(ialu_reg);
12549 %}
12550 
12551 // Logical Shift Right by 8-bit immediate
12552 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12553 %{
12554   predicate(UseAPX);
12555   match(Set dst (URShiftL src shift));
12556   effect(KILL cr);
12557   flag(PD::Flag_ndd_demotable_opr1);
12558 
12559   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12560   ins_encode %{
12561     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12562   %}
12563   ins_pipe(ialu_reg);
12564 %}
12565 
12566 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12567 %{
12568   predicate(UseAPX);
12569   match(Set dst (URShiftL (LoadL src) shift));
12570   effect(KILL cr);
12571 
12572   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12573   ins_encode %{
12574     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12575   %}
12576   ins_pipe(ialu_reg);
12577 %}
12578 
12579 // Logical Shift Right by 8-bit immediate
12580 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12581 %{
12582   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12583   effect(KILL cr);
12584 
12585   format %{ "shrq    $dst, $shift" %}
12586   ins_encode %{
12587     __ shrq($dst$$Address, $shift$$constant);
12588   %}
12589   ins_pipe(ialu_mem_imm);
12590 %}
12591 
12592 // Logical Shift Right by variable
12593 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12594 %{
12595   predicate(!VM_Version::supports_bmi2());
12596   match(Set dst (URShiftL dst shift));
12597   effect(KILL cr);
12598 
12599   format %{ "shrq    $dst, $shift" %}
12600   ins_encode %{
12601     __ shrq($dst$$Register);
12602   %}
12603   ins_pipe(ialu_reg_reg);
12604 %}
12605 
12606 // Logical Shift Right by variable
12607 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12608 %{
12609   predicate(!VM_Version::supports_bmi2());
12610   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12611   effect(KILL cr);
12612 
12613   format %{ "shrq    $dst, $shift" %}
12614   ins_encode %{
12615     __ shrq($dst$$Address);
12616   %}
12617   ins_pipe(ialu_mem_reg);
12618 %}
12619 
12620 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12621 %{
12622   predicate(VM_Version::supports_bmi2());
12623   match(Set dst (URShiftL src shift));
12624 
12625   format %{ "shrxq   $dst, $src, $shift" %}
12626   ins_encode %{
12627     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12628   %}
12629   ins_pipe(ialu_reg_reg);
12630 %}
12631 
12632 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12633 %{
12634   predicate(VM_Version::supports_bmi2());
12635   match(Set dst (URShiftL (LoadL src) shift));
12636   ins_cost(175);
12637   format %{ "shrxq   $dst, $src, $shift" %}
12638   ins_encode %{
12639     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12640   %}
12641   ins_pipe(ialu_reg_mem);
12642 %}
12643 
12644 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12645 // This idiom is used by the compiler for the i2b bytecode.
12646 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12647 %{
12648   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12649 
12650   format %{ "movsbl  $dst, $src\t# i2b" %}
12651   ins_encode %{
12652     __ movsbl($dst$$Register, $src$$Register);
12653   %}
12654   ins_pipe(ialu_reg_reg);
12655 %}
12656 
12657 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12658 // This idiom is used by the compiler the i2s bytecode.
12659 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12660 %{
12661   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12662 
12663   format %{ "movswl  $dst, $src\t# i2s" %}
12664   ins_encode %{
12665     __ movswl($dst$$Register, $src$$Register);
12666   %}
12667   ins_pipe(ialu_reg_reg);
12668 %}
12669 
12670 // ROL/ROR instructions
12671 
12672 // Rotate left by constant.
12673 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12674 %{
12675   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12676   match(Set dst (RotateLeft dst shift));
12677   effect(KILL cr);
12678   format %{ "roll    $dst, $shift" %}
12679   ins_encode %{
12680     __ roll($dst$$Register, $shift$$constant);
12681   %}
12682   ins_pipe(ialu_reg);
12683 %}
12684 
12685 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12686 %{
12687   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12688   match(Set dst (RotateLeft src shift));
12689   format %{ "rolxl   $dst, $src, $shift" %}
12690   ins_encode %{
12691     int shift = 32 - ($shift$$constant & 31);
12692     __ rorxl($dst$$Register, $src$$Register, shift);
12693   %}
12694   ins_pipe(ialu_reg_reg);
12695 %}
12696 
12697 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12698 %{
12699   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12700   match(Set dst (RotateLeft (LoadI src) shift));
12701   ins_cost(175);
12702   format %{ "rolxl   $dst, $src, $shift" %}
12703   ins_encode %{
12704     int shift = 32 - ($shift$$constant & 31);
12705     __ rorxl($dst$$Register, $src$$Address, shift);
12706   %}
12707   ins_pipe(ialu_reg_mem);
12708 %}
12709 
12710 // Rotate Left by variable
12711 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12712 %{
12713   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12714   match(Set dst (RotateLeft dst shift));
12715   effect(KILL cr);
12716   format %{ "roll    $dst, $shift" %}
12717   ins_encode %{
12718     __ roll($dst$$Register);
12719   %}
12720   ins_pipe(ialu_reg_reg);
12721 %}
12722 
12723 // Rotate Left by variable
12724 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12725 %{
12726   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12727   match(Set dst (RotateLeft src shift));
12728   effect(KILL cr);
12729   flag(PD::Flag_ndd_demotable_opr1);
12730 
12731   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12732   ins_encode %{
12733     __ eroll($dst$$Register, $src$$Register, false);
12734   %}
12735   ins_pipe(ialu_reg_reg);
12736 %}
12737 
12738 // Rotate Right by constant.
12739 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12740 %{
12741   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12742   match(Set dst (RotateRight dst shift));
12743   effect(KILL cr);
12744   format %{ "rorl    $dst, $shift" %}
12745   ins_encode %{
12746     __ rorl($dst$$Register, $shift$$constant);
12747   %}
12748   ins_pipe(ialu_reg);
12749 %}
12750 
12751 // Rotate Right by constant.
12752 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12753 %{
12754   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12755   match(Set dst (RotateRight src shift));
12756   format %{ "rorxl   $dst, $src, $shift" %}
12757   ins_encode %{
12758     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12759   %}
12760   ins_pipe(ialu_reg_reg);
12761 %}
12762 
12763 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12764 %{
12765   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12766   match(Set dst (RotateRight (LoadI src) shift));
12767   ins_cost(175);
12768   format %{ "rorxl   $dst, $src, $shift" %}
12769   ins_encode %{
12770     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12771   %}
12772   ins_pipe(ialu_reg_mem);
12773 %}
12774 
12775 // Rotate Right by variable
12776 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12777 %{
12778   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12779   match(Set dst (RotateRight dst shift));
12780   effect(KILL cr);
12781   format %{ "rorl    $dst, $shift" %}
12782   ins_encode %{
12783     __ rorl($dst$$Register);
12784   %}
12785   ins_pipe(ialu_reg_reg);
12786 %}
12787 
12788 // Rotate Right by variable
12789 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12790 %{
12791   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12792   match(Set dst (RotateRight src shift));
12793   effect(KILL cr);
12794   flag(PD::Flag_ndd_demotable_opr1);
12795 
12796   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12797   ins_encode %{
12798     __ erorl($dst$$Register, $src$$Register, false);
12799   %}
12800   ins_pipe(ialu_reg_reg);
12801 %}
12802 
12803 // Rotate Left by constant.
12804 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12805 %{
12806   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12807   match(Set dst (RotateLeft dst shift));
12808   effect(KILL cr);
12809   format %{ "rolq    $dst, $shift" %}
12810   ins_encode %{
12811     __ rolq($dst$$Register, $shift$$constant);
12812   %}
12813   ins_pipe(ialu_reg);
12814 %}
12815 
12816 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12817 %{
12818   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12819   match(Set dst (RotateLeft src shift));
12820   format %{ "rolxq   $dst, $src, $shift" %}
12821   ins_encode %{
12822     int shift = 64 - ($shift$$constant & 63);
12823     __ rorxq($dst$$Register, $src$$Register, shift);
12824   %}
12825   ins_pipe(ialu_reg_reg);
12826 %}
12827 
12828 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12829 %{
12830   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12831   match(Set dst (RotateLeft (LoadL src) shift));
12832   ins_cost(175);
12833   format %{ "rolxq   $dst, $src, $shift" %}
12834   ins_encode %{
12835     int shift = 64 - ($shift$$constant & 63);
12836     __ rorxq($dst$$Register, $src$$Address, shift);
12837   %}
12838   ins_pipe(ialu_reg_mem);
12839 %}
12840 
12841 // Rotate Left by variable
12842 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12843 %{
12844   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12845   match(Set dst (RotateLeft dst shift));
12846   effect(KILL cr);
12847 
12848   format %{ "rolq    $dst, $shift" %}
12849   ins_encode %{
12850     __ rolq($dst$$Register);
12851   %}
12852   ins_pipe(ialu_reg_reg);
12853 %}
12854 
12855 // Rotate Left by variable
12856 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12857 %{
12858   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12859   match(Set dst (RotateLeft src shift));
12860   effect(KILL cr);
12861   flag(PD::Flag_ndd_demotable_opr1);
12862 
12863   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12864   ins_encode %{
12865     __ erolq($dst$$Register, $src$$Register, false);
12866   %}
12867   ins_pipe(ialu_reg_reg);
12868 %}
12869 
12870 // Rotate Right by constant.
12871 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12872 %{
12873   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12874   match(Set dst (RotateRight dst shift));
12875   effect(KILL cr);
12876   format %{ "rorq    $dst, $shift" %}
12877   ins_encode %{
12878     __ rorq($dst$$Register, $shift$$constant);
12879   %}
12880   ins_pipe(ialu_reg);
12881 %}
12882 
12883 // Rotate Right by constant
12884 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12885 %{
12886   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12887   match(Set dst (RotateRight src shift));
12888   format %{ "rorxq   $dst, $src, $shift" %}
12889   ins_encode %{
12890     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12891   %}
12892   ins_pipe(ialu_reg_reg);
12893 %}
12894 
12895 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12896 %{
12897   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12898   match(Set dst (RotateRight (LoadL src) shift));
12899   ins_cost(175);
12900   format %{ "rorxq   $dst, $src, $shift" %}
12901   ins_encode %{
12902     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12903   %}
12904   ins_pipe(ialu_reg_mem);
12905 %}
12906 
12907 // Rotate Right by variable
12908 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12909 %{
12910   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12911   match(Set dst (RotateRight dst shift));
12912   effect(KILL cr);
12913   format %{ "rorq    $dst, $shift" %}
12914   ins_encode %{
12915     __ rorq($dst$$Register);
12916   %}
12917   ins_pipe(ialu_reg_reg);
12918 %}
12919 
12920 // Rotate Right by variable
12921 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12922 %{
12923   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12924   match(Set dst (RotateRight src shift));
12925   effect(KILL cr);
12926   flag(PD::Flag_ndd_demotable_opr1);
12927 
12928   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12929   ins_encode %{
12930     __ erorq($dst$$Register, $src$$Register, false);
12931   %}
12932   ins_pipe(ialu_reg_reg);
12933 %}
12934 
12935 //----------------------------- CompressBits/ExpandBits ------------------------
12936 
12937 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12938   predicate(n->bottom_type()->isa_long());
12939   match(Set dst (CompressBits src mask));
12940   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12941   ins_encode %{
12942     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12943   %}
12944   ins_pipe( pipe_slow );
12945 %}
12946 
12947 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12948   predicate(n->bottom_type()->isa_long());
12949   match(Set dst (ExpandBits src mask));
12950   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12951   ins_encode %{
12952     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12953   %}
12954   ins_pipe( pipe_slow );
12955 %}
12956 
12957 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12958   predicate(n->bottom_type()->isa_long());
12959   match(Set dst (CompressBits src (LoadL mask)));
12960   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12961   ins_encode %{
12962     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12963   %}
12964   ins_pipe( pipe_slow );
12965 %}
12966 
12967 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12968   predicate(n->bottom_type()->isa_long());
12969   match(Set dst (ExpandBits src (LoadL mask)));
12970   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12971   ins_encode %{
12972     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12973   %}
12974   ins_pipe( pipe_slow );
12975 %}
12976 
12977 
12978 // Logical Instructions
12979 
12980 // Integer Logical Instructions
12981 
12982 // And Instructions
12983 // And Register with Register
12984 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12985 %{
12986   predicate(!UseAPX);
12987   match(Set dst (AndI dst src));
12988   effect(KILL cr);
12989   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12990 
12991   format %{ "andl    $dst, $src\t# int" %}
12992   ins_encode %{
12993     __ andl($dst$$Register, $src$$Register);
12994   %}
12995   ins_pipe(ialu_reg_reg);
12996 %}
12997 
12998 // And Register with Register using New Data Destination (NDD)
12999 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13000 %{
13001   predicate(UseAPX);
13002   match(Set dst (AndI src1 src2));
13003   effect(KILL cr);
13004   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13005 
13006   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
13007   ins_encode %{
13008     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
13009 
13010   %}
13011   ins_pipe(ialu_reg_reg);
13012 %}
13013 
13014 // And Register with Immediate 255
13015 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
13016 %{
13017   match(Set dst (AndI src mask));
13018 
13019   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
13020   ins_encode %{
13021     __ movzbl($dst$$Register, $src$$Register);
13022   %}
13023   ins_pipe(ialu_reg);
13024 %}
13025 
13026 // And Register with Immediate 255 and promote to long
13027 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
13028 %{
13029   match(Set dst (ConvI2L (AndI src mask)));
13030 
13031   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
13032   ins_encode %{
13033     __ movzbl($dst$$Register, $src$$Register);
13034   %}
13035   ins_pipe(ialu_reg);
13036 %}
13037 
13038 // And Register with Immediate 65535
13039 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13040 %{
13041   match(Set dst (AndI src mask));
13042 
13043   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
13044   ins_encode %{
13045     __ movzwl($dst$$Register, $src$$Register);
13046   %}
13047   ins_pipe(ialu_reg);
13048 %}
13049 
13050 // And Register with Immediate 65535 and promote to long
13051 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13052 %{
13053   match(Set dst (ConvI2L (AndI src mask)));
13054 
13055   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
13056   ins_encode %{
13057     __ movzwl($dst$$Register, $src$$Register);
13058   %}
13059   ins_pipe(ialu_reg);
13060 %}
13061 
13062 // Can skip int2long conversions after AND with small bitmask
13063 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13064 %{
13065   predicate(VM_Version::supports_bmi2());
13066   ins_cost(125);
13067   effect(TEMP tmp, KILL cr);
13068   match(Set dst (ConvI2L (AndI src mask)));
13069   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
13070   ins_encode %{
13071     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13072     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13073   %}
13074   ins_pipe(ialu_reg_reg);
13075 %}
13076 
13077 // And Register with Immediate
13078 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13079 %{
13080   predicate(!UseAPX);
13081   match(Set dst (AndI dst src));
13082   effect(KILL cr);
13083   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13084 
13085   format %{ "andl    $dst, $src\t# int" %}
13086   ins_encode %{
13087     __ andl($dst$$Register, $src$$constant);
13088   %}
13089   ins_pipe(ialu_reg);
13090 %}
13091 
13092 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13093 %{
13094   predicate(UseAPX);
13095   match(Set dst (AndI src1 src2));
13096   effect(KILL cr);
13097   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13098 
13099   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13100   ins_encode %{
13101     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13102   %}
13103   ins_pipe(ialu_reg);
13104 %}
13105 
13106 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13107 %{
13108   predicate(UseAPX);
13109   match(Set dst (AndI (LoadI src1) src2));
13110   effect(KILL cr);
13111   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13112 
13113   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13114   ins_encode %{
13115     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13116   %}
13117   ins_pipe(ialu_reg);
13118 %}
13119 
13120 // And Register with Memory
13121 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13122 %{
13123   predicate(!UseAPX);
13124   match(Set dst (AndI dst (LoadI src)));
13125   effect(KILL cr);
13126   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13127 
13128   ins_cost(150);
13129   format %{ "andl    $dst, $src\t# int" %}
13130   ins_encode %{
13131     __ andl($dst$$Register, $src$$Address);
13132   %}
13133   ins_pipe(ialu_reg_mem);
13134 %}
13135 
13136 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13137 %{
13138   predicate(UseAPX);
13139   match(Set dst (AndI src1 (LoadI src2)));
13140   effect(KILL cr);
13141   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13142 
13143   ins_cost(150);
13144   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13145   ins_encode %{
13146     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13147   %}
13148   ins_pipe(ialu_reg_mem);
13149 %}
13150 
13151 // And Memory with Register
13152 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13153 %{
13154   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13155   effect(KILL cr);
13156   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13157 
13158   ins_cost(150);
13159   format %{ "andb    $dst, $src\t# byte" %}
13160   ins_encode %{
13161     __ andb($dst$$Address, $src$$Register);
13162   %}
13163   ins_pipe(ialu_mem_reg);
13164 %}
13165 
13166 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13167 %{
13168   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13169   effect(KILL cr);
13170   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13171 
13172   ins_cost(150);
13173   format %{ "andl    $dst, $src\t# int" %}
13174   ins_encode %{
13175     __ andl($dst$$Address, $src$$Register);
13176   %}
13177   ins_pipe(ialu_mem_reg);
13178 %}
13179 
13180 // And Memory with Immediate
13181 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13182 %{
13183   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13184   effect(KILL cr);
13185   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13186 
13187   ins_cost(125);
13188   format %{ "andl    $dst, $src\t# int" %}
13189   ins_encode %{
13190     __ andl($dst$$Address, $src$$constant);
13191   %}
13192   ins_pipe(ialu_mem_imm);
13193 %}
13194 
13195 // BMI1 instructions
13196 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13197   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13198   predicate(UseBMI1Instructions);
13199   effect(KILL cr);
13200   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13201 
13202   ins_cost(125);
13203   format %{ "andnl  $dst, $src1, $src2" %}
13204 
13205   ins_encode %{
13206     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13207   %}
13208   ins_pipe(ialu_reg_mem);
13209 %}
13210 
13211 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13212   match(Set dst (AndI (XorI src1 minus_1) src2));
13213   predicate(UseBMI1Instructions);
13214   effect(KILL cr);
13215   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13216 
13217   format %{ "andnl  $dst, $src1, $src2" %}
13218 
13219   ins_encode %{
13220     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13221   %}
13222   ins_pipe(ialu_reg);
13223 %}
13224 
13225 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13226   match(Set dst (AndI (SubI imm_zero src) src));
13227   predicate(UseBMI1Instructions);
13228   effect(KILL cr);
13229   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13230 
13231   format %{ "blsil  $dst, $src" %}
13232 
13233   ins_encode %{
13234     __ blsil($dst$$Register, $src$$Register);
13235   %}
13236   ins_pipe(ialu_reg);
13237 %}
13238 
13239 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13240   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13241   predicate(UseBMI1Instructions);
13242   effect(KILL cr);
13243   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13244 
13245   ins_cost(125);
13246   format %{ "blsil  $dst, $src" %}
13247 
13248   ins_encode %{
13249     __ blsil($dst$$Register, $src$$Address);
13250   %}
13251   ins_pipe(ialu_reg_mem);
13252 %}
13253 
13254 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13255 %{
13256   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13257   predicate(UseBMI1Instructions);
13258   effect(KILL cr);
13259   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13260 
13261   ins_cost(125);
13262   format %{ "blsmskl $dst, $src" %}
13263 
13264   ins_encode %{
13265     __ blsmskl($dst$$Register, $src$$Address);
13266   %}
13267   ins_pipe(ialu_reg_mem);
13268 %}
13269 
13270 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13271 %{
13272   match(Set dst (XorI (AddI src minus_1) src));
13273   predicate(UseBMI1Instructions);
13274   effect(KILL cr);
13275   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13276 
13277   format %{ "blsmskl $dst, $src" %}
13278 
13279   ins_encode %{
13280     __ blsmskl($dst$$Register, $src$$Register);
13281   %}
13282 
13283   ins_pipe(ialu_reg);
13284 %}
13285 
13286 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13287 %{
13288   match(Set dst (AndI (AddI src minus_1) src) );
13289   predicate(UseBMI1Instructions);
13290   effect(KILL cr);
13291   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13292 
13293   format %{ "blsrl  $dst, $src" %}
13294 
13295   ins_encode %{
13296     __ blsrl($dst$$Register, $src$$Register);
13297   %}
13298 
13299   ins_pipe(ialu_reg_mem);
13300 %}
13301 
13302 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13303 %{
13304   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13305   predicate(UseBMI1Instructions);
13306   effect(KILL cr);
13307   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13308 
13309   ins_cost(125);
13310   format %{ "blsrl  $dst, $src" %}
13311 
13312   ins_encode %{
13313     __ blsrl($dst$$Register, $src$$Address);
13314   %}
13315 
13316   ins_pipe(ialu_reg);
13317 %}
13318 
13319 // Or Instructions
13320 // Or Register with Register
13321 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13322 %{
13323   predicate(!UseAPX);
13324   match(Set dst (OrI dst src));
13325   effect(KILL cr);
13326   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13327 
13328   format %{ "orl     $dst, $src\t# int" %}
13329   ins_encode %{
13330     __ orl($dst$$Register, $src$$Register);
13331   %}
13332   ins_pipe(ialu_reg_reg);
13333 %}
13334 
13335 // Or Register with Register using New Data Destination (NDD)
13336 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13337 %{
13338   predicate(UseAPX);
13339   match(Set dst (OrI src1 src2));
13340   effect(KILL cr);
13341   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13342 
13343   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13344   ins_encode %{
13345     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13346   %}
13347   ins_pipe(ialu_reg_reg);
13348 %}
13349 
13350 // Or Register with Immediate
13351 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13352 %{
13353   predicate(!UseAPX);
13354   match(Set dst (OrI dst src));
13355   effect(KILL cr);
13356   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13357 
13358   format %{ "orl     $dst, $src\t# int" %}
13359   ins_encode %{
13360     __ orl($dst$$Register, $src$$constant);
13361   %}
13362   ins_pipe(ialu_reg);
13363 %}
13364 
13365 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13366 %{
13367   predicate(UseAPX);
13368   match(Set dst (OrI src1 src2));
13369   effect(KILL cr);
13370   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13371 
13372   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13373   ins_encode %{
13374     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13375   %}
13376   ins_pipe(ialu_reg);
13377 %}
13378 
13379 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13380 %{
13381   predicate(UseAPX);
13382   match(Set dst (OrI src1 src2));
13383   effect(KILL cr);
13384   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13385 
13386   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13387   ins_encode %{
13388     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13389   %}
13390   ins_pipe(ialu_reg);
13391 %}
13392 
13393 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13394 %{
13395   predicate(UseAPX);
13396   match(Set dst (OrI (LoadI src1) src2));
13397   effect(KILL cr);
13398   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13399 
13400   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13401   ins_encode %{
13402     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13403   %}
13404   ins_pipe(ialu_reg);
13405 %}
13406 
13407 // Or Register with Memory
13408 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13409 %{
13410   predicate(!UseAPX);
13411   match(Set dst (OrI dst (LoadI src)));
13412   effect(KILL cr);
13413   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13414 
13415   ins_cost(150);
13416   format %{ "orl     $dst, $src\t# int" %}
13417   ins_encode %{
13418     __ orl($dst$$Register, $src$$Address);
13419   %}
13420   ins_pipe(ialu_reg_mem);
13421 %}
13422 
13423 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13424 %{
13425   predicate(UseAPX);
13426   match(Set dst (OrI src1 (LoadI src2)));
13427   effect(KILL cr);
13428   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13429 
13430   ins_cost(150);
13431   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13432   ins_encode %{
13433     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13434   %}
13435   ins_pipe(ialu_reg_mem);
13436 %}
13437 
13438 // Or Memory with Register
13439 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13440 %{
13441   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13442   effect(KILL cr);
13443   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13444 
13445   ins_cost(150);
13446   format %{ "orb    $dst, $src\t# byte" %}
13447   ins_encode %{
13448     __ orb($dst$$Address, $src$$Register);
13449   %}
13450   ins_pipe(ialu_mem_reg);
13451 %}
13452 
13453 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13454 %{
13455   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13456   effect(KILL cr);
13457   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13458 
13459   ins_cost(150);
13460   format %{ "orl     $dst, $src\t# int" %}
13461   ins_encode %{
13462     __ orl($dst$$Address, $src$$Register);
13463   %}
13464   ins_pipe(ialu_mem_reg);
13465 %}
13466 
13467 // Or Memory with Immediate
13468 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13469 %{
13470   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13471   effect(KILL cr);
13472   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13473 
13474   ins_cost(125);
13475   format %{ "orl     $dst, $src\t# int" %}
13476   ins_encode %{
13477     __ orl($dst$$Address, $src$$constant);
13478   %}
13479   ins_pipe(ialu_mem_imm);
13480 %}
13481 
13482 // Xor Instructions
13483 // Xor Register with Register
13484 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13485 %{
13486   predicate(!UseAPX);
13487   match(Set dst (XorI dst src));
13488   effect(KILL cr);
13489   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13490 
13491   format %{ "xorl    $dst, $src\t# int" %}
13492   ins_encode %{
13493     __ xorl($dst$$Register, $src$$Register);
13494   %}
13495   ins_pipe(ialu_reg_reg);
13496 %}
13497 
13498 // Xor Register with Register using New Data Destination (NDD)
13499 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13500 %{
13501   predicate(UseAPX);
13502   match(Set dst (XorI src1 src2));
13503   effect(KILL cr);
13504   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13505 
13506   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13507   ins_encode %{
13508     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13509   %}
13510   ins_pipe(ialu_reg_reg);
13511 %}
13512 
13513 // Xor Register with Immediate -1
13514 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13515 %{
13516   predicate(!UseAPX);
13517   match(Set dst (XorI dst imm));
13518 
13519   format %{ "notl    $dst" %}
13520   ins_encode %{
13521      __ notl($dst$$Register);
13522   %}
13523   ins_pipe(ialu_reg);
13524 %}
13525 
13526 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13527 %{
13528   match(Set dst (XorI src imm));
13529   predicate(UseAPX);
13530   flag(PD::Flag_ndd_demotable_opr1);
13531 
13532   format %{ "enotl    $dst, $src" %}
13533   ins_encode %{
13534      __ enotl($dst$$Register, $src$$Register);
13535   %}
13536   ins_pipe(ialu_reg);
13537 %}
13538 
13539 // Xor Register with Immediate
13540 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13541 %{
13542   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13543   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13544   match(Set dst (XorI dst src));
13545   effect(KILL cr);
13546   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13547 
13548   format %{ "xorl    $dst, $src\t# int" %}
13549   ins_encode %{
13550     __ xorl($dst$$Register, $src$$constant);
13551   %}
13552   ins_pipe(ialu_reg);
13553 %}
13554 
13555 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13556 %{
13557   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13558   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13559   match(Set dst (XorI src1 src2));
13560   effect(KILL cr);
13561   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13562 
13563   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13564   ins_encode %{
13565     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13566   %}
13567   ins_pipe(ialu_reg);
13568 %}
13569 
13570 // Xor Memory with Immediate
13571 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13572 %{
13573   predicate(UseAPX);
13574   match(Set dst (XorI (LoadI src1) src2));
13575   effect(KILL cr);
13576   ins_cost(150);
13577   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13578 
13579   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13580   ins_encode %{
13581     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13582   %}
13583   ins_pipe(ialu_reg);
13584 %}
13585 
13586 // Xor Register with Memory
13587 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13588 %{
13589   predicate(!UseAPX);
13590   match(Set dst (XorI dst (LoadI src)));
13591   effect(KILL cr);
13592   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13593 
13594   ins_cost(150);
13595   format %{ "xorl    $dst, $src\t# int" %}
13596   ins_encode %{
13597     __ xorl($dst$$Register, $src$$Address);
13598   %}
13599   ins_pipe(ialu_reg_mem);
13600 %}
13601 
13602 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13603 %{
13604   predicate(UseAPX);
13605   match(Set dst (XorI src1 (LoadI src2)));
13606   effect(KILL cr);
13607   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13608 
13609   ins_cost(150);
13610   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13611   ins_encode %{
13612     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13613   %}
13614   ins_pipe(ialu_reg_mem);
13615 %}
13616 
13617 // Xor Memory with Register
13618 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13619 %{
13620   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13621   effect(KILL cr);
13622   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13623 
13624   ins_cost(150);
13625   format %{ "xorb    $dst, $src\t# byte" %}
13626   ins_encode %{
13627     __ xorb($dst$$Address, $src$$Register);
13628   %}
13629   ins_pipe(ialu_mem_reg);
13630 %}
13631 
13632 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13633 %{
13634   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13635   effect(KILL cr);
13636   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13637 
13638   ins_cost(150);
13639   format %{ "xorl    $dst, $src\t# int" %}
13640   ins_encode %{
13641     __ xorl($dst$$Address, $src$$Register);
13642   %}
13643   ins_pipe(ialu_mem_reg);
13644 %}
13645 
13646 // Xor Memory with Immediate
13647 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13648 %{
13649   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13650   effect(KILL cr);
13651   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13652 
13653   ins_cost(125);
13654   format %{ "xorl    $dst, $src\t# int" %}
13655   ins_encode %{
13656     __ xorl($dst$$Address, $src$$constant);
13657   %}
13658   ins_pipe(ialu_mem_imm);
13659 %}
13660 
13661 
13662 // Long Logical Instructions
13663 
13664 // And Instructions
13665 // And Register with Register
13666 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13667 %{
13668   predicate(!UseAPX);
13669   match(Set dst (AndL dst src));
13670   effect(KILL cr);
13671   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13672 
13673   format %{ "andq    $dst, $src\t# long" %}
13674   ins_encode %{
13675     __ andq($dst$$Register, $src$$Register);
13676   %}
13677   ins_pipe(ialu_reg_reg);
13678 %}
13679 
13680 // And Register with Register using New Data Destination (NDD)
13681 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13682 %{
13683   predicate(UseAPX);
13684   match(Set dst (AndL src1 src2));
13685   effect(KILL cr);
13686   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13687 
13688   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13689   ins_encode %{
13690     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13691 
13692   %}
13693   ins_pipe(ialu_reg_reg);
13694 %}
13695 
13696 // And Register with Immediate 255
13697 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13698 %{
13699   match(Set dst (AndL src mask));
13700 
13701   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13702   ins_encode %{
13703     // movzbl zeroes out the upper 32-bit and does not need REX.W
13704     __ movzbl($dst$$Register, $src$$Register);
13705   %}
13706   ins_pipe(ialu_reg);
13707 %}
13708 
13709 // And Register with Immediate 65535
13710 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13711 %{
13712   match(Set dst (AndL src mask));
13713 
13714   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13715   ins_encode %{
13716     // movzwl zeroes out the upper 32-bit and does not need REX.W
13717     __ movzwl($dst$$Register, $src$$Register);
13718   %}
13719   ins_pipe(ialu_reg);
13720 %}
13721 
13722 // And Register with Immediate
13723 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13724 %{
13725   predicate(!UseAPX);
13726   match(Set dst (AndL dst src));
13727   effect(KILL cr);
13728   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13729 
13730   format %{ "andq    $dst, $src\t# long" %}
13731   ins_encode %{
13732     __ andq($dst$$Register, $src$$constant);
13733   %}
13734   ins_pipe(ialu_reg);
13735 %}
13736 
13737 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13738 %{
13739   predicate(UseAPX);
13740   match(Set dst (AndL src1 src2));
13741   effect(KILL cr);
13742   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13743 
13744   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13745   ins_encode %{
13746     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13747   %}
13748   ins_pipe(ialu_reg);
13749 %}
13750 
13751 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13752 %{
13753   predicate(UseAPX);
13754   match(Set dst (AndL (LoadL src1) src2));
13755   effect(KILL cr);
13756   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13757 
13758   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13759   ins_encode %{
13760     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13761   %}
13762   ins_pipe(ialu_reg);
13763 %}
13764 
13765 // And Register with Memory
13766 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13767 %{
13768   predicate(!UseAPX);
13769   match(Set dst (AndL dst (LoadL src)));
13770   effect(KILL cr);
13771   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13772 
13773   ins_cost(150);
13774   format %{ "andq    $dst, $src\t# long" %}
13775   ins_encode %{
13776     __ andq($dst$$Register, $src$$Address);
13777   %}
13778   ins_pipe(ialu_reg_mem);
13779 %}
13780 
13781 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13782 %{
13783   predicate(UseAPX);
13784   match(Set dst (AndL src1 (LoadL src2)));
13785   effect(KILL cr);
13786   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13787 
13788   ins_cost(150);
13789   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13790   ins_encode %{
13791     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13792   %}
13793   ins_pipe(ialu_reg_mem);
13794 %}
13795 
13796 // And Memory with Register
13797 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13798 %{
13799   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13800   effect(KILL cr);
13801   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13802 
13803   ins_cost(150);
13804   format %{ "andq    $dst, $src\t# long" %}
13805   ins_encode %{
13806     __ andq($dst$$Address, $src$$Register);
13807   %}
13808   ins_pipe(ialu_mem_reg);
13809 %}
13810 
13811 // And Memory with Immediate
13812 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13813 %{
13814   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13815   effect(KILL cr);
13816   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13817 
13818   ins_cost(125);
13819   format %{ "andq    $dst, $src\t# long" %}
13820   ins_encode %{
13821     __ andq($dst$$Address, $src$$constant);
13822   %}
13823   ins_pipe(ialu_mem_imm);
13824 %}
13825 
13826 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13827 %{
13828   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13829   // because AND/OR works well enough for 8/32-bit values.
13830   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13831 
13832   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13833   effect(KILL cr);
13834 
13835   ins_cost(125);
13836   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13837   ins_encode %{
13838     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13839   %}
13840   ins_pipe(ialu_mem_imm);
13841 %}
13842 
13843 // BMI1 instructions
13844 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13845   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13846   predicate(UseBMI1Instructions);
13847   effect(KILL cr);
13848   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13849 
13850   ins_cost(125);
13851   format %{ "andnq  $dst, $src1, $src2" %}
13852 
13853   ins_encode %{
13854     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13855   %}
13856   ins_pipe(ialu_reg_mem);
13857 %}
13858 
13859 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13860   match(Set dst (AndL (XorL src1 minus_1) src2));
13861   predicate(UseBMI1Instructions);
13862   effect(KILL cr);
13863   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13864 
13865   format %{ "andnq  $dst, $src1, $src2" %}
13866 
13867   ins_encode %{
13868   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13869   %}
13870   ins_pipe(ialu_reg_mem);
13871 %}
13872 
13873 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13874   match(Set dst (AndL (SubL imm_zero src) src));
13875   predicate(UseBMI1Instructions);
13876   effect(KILL cr);
13877   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13878 
13879   format %{ "blsiq  $dst, $src" %}
13880 
13881   ins_encode %{
13882     __ blsiq($dst$$Register, $src$$Register);
13883   %}
13884   ins_pipe(ialu_reg);
13885 %}
13886 
13887 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13888   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13889   predicate(UseBMI1Instructions);
13890   effect(KILL cr);
13891   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13892 
13893   ins_cost(125);
13894   format %{ "blsiq  $dst, $src" %}
13895 
13896   ins_encode %{
13897     __ blsiq($dst$$Register, $src$$Address);
13898   %}
13899   ins_pipe(ialu_reg_mem);
13900 %}
13901 
13902 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13903 %{
13904   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13905   predicate(UseBMI1Instructions);
13906   effect(KILL cr);
13907   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13908 
13909   ins_cost(125);
13910   format %{ "blsmskq $dst, $src" %}
13911 
13912   ins_encode %{
13913     __ blsmskq($dst$$Register, $src$$Address);
13914   %}
13915   ins_pipe(ialu_reg_mem);
13916 %}
13917 
13918 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13919 %{
13920   match(Set dst (XorL (AddL src minus_1) src));
13921   predicate(UseBMI1Instructions);
13922   effect(KILL cr);
13923   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13924 
13925   format %{ "blsmskq $dst, $src" %}
13926 
13927   ins_encode %{
13928     __ blsmskq($dst$$Register, $src$$Register);
13929   %}
13930 
13931   ins_pipe(ialu_reg);
13932 %}
13933 
13934 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13935 %{
13936   match(Set dst (AndL (AddL src minus_1) src) );
13937   predicate(UseBMI1Instructions);
13938   effect(KILL cr);
13939   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13940 
13941   format %{ "blsrq  $dst, $src" %}
13942 
13943   ins_encode %{
13944     __ blsrq($dst$$Register, $src$$Register);
13945   %}
13946 
13947   ins_pipe(ialu_reg);
13948 %}
13949 
13950 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13951 %{
13952   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13953   predicate(UseBMI1Instructions);
13954   effect(KILL cr);
13955   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13956 
13957   ins_cost(125);
13958   format %{ "blsrq  $dst, $src" %}
13959 
13960   ins_encode %{
13961     __ blsrq($dst$$Register, $src$$Address);
13962   %}
13963 
13964   ins_pipe(ialu_reg);
13965 %}
13966 
13967 // Or Instructions
13968 // Or Register with Register
13969 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13970 %{
13971   predicate(!UseAPX);
13972   match(Set dst (OrL dst src));
13973   effect(KILL cr);
13974   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13975 
13976   format %{ "orq     $dst, $src\t# long" %}
13977   ins_encode %{
13978     __ orq($dst$$Register, $src$$Register);
13979   %}
13980   ins_pipe(ialu_reg_reg);
13981 %}
13982 
13983 // Or Register with Register using New Data Destination (NDD)
13984 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13985 %{
13986   predicate(UseAPX);
13987   match(Set dst (OrL src1 src2));
13988   effect(KILL cr);
13989   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13990 
13991   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13992   ins_encode %{
13993     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13994 
13995   %}
13996   ins_pipe(ialu_reg_reg);
13997 %}
13998 
13999 // Use any_RegP to match R15 (TLS register) without spilling.
14000 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
14001   match(Set dst (OrL dst (CastP2X src)));
14002   effect(KILL cr);
14003   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14004 
14005   format %{ "orq     $dst, $src\t# long" %}
14006   ins_encode %{
14007     __ orq($dst$$Register, $src$$Register);
14008   %}
14009   ins_pipe(ialu_reg_reg);
14010 %}
14011 
14012 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
14013   match(Set dst (OrL src1 (CastP2X src2)));
14014   effect(KILL cr);
14015   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14016 
14017   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14018   ins_encode %{
14019     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14020   %}
14021   ins_pipe(ialu_reg_reg);
14022 %}
14023 
14024 // Or Register with Immediate
14025 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14026 %{
14027   predicate(!UseAPX);
14028   match(Set dst (OrL dst src));
14029   effect(KILL cr);
14030   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14031 
14032   format %{ "orq     $dst, $src\t# long" %}
14033   ins_encode %{
14034     __ orq($dst$$Register, $src$$constant);
14035   %}
14036   ins_pipe(ialu_reg);
14037 %}
14038 
14039 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14040 %{
14041   predicate(UseAPX);
14042   match(Set dst (OrL src1 src2));
14043   effect(KILL cr);
14044   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14045 
14046   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14047   ins_encode %{
14048     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14049   %}
14050   ins_pipe(ialu_reg);
14051 %}
14052 
14053 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14054 %{
14055   predicate(UseAPX);
14056   match(Set dst (OrL src1 src2));
14057   effect(KILL cr);
14058   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14059 
14060   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
14061   ins_encode %{
14062     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14063   %}
14064   ins_pipe(ialu_reg);
14065 %}
14066 
14067 // Or Memory with Immediate
14068 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14069 %{
14070   predicate(UseAPX);
14071   match(Set dst (OrL (LoadL src1) src2));
14072   effect(KILL cr);
14073   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14074 
14075   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14076   ins_encode %{
14077     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14078   %}
14079   ins_pipe(ialu_reg);
14080 %}
14081 
14082 // Or Register with Memory
14083 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14084 %{
14085   predicate(!UseAPX);
14086   match(Set dst (OrL dst (LoadL src)));
14087   effect(KILL cr);
14088   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14089 
14090   ins_cost(150);
14091   format %{ "orq     $dst, $src\t# long" %}
14092   ins_encode %{
14093     __ orq($dst$$Register, $src$$Address);
14094   %}
14095   ins_pipe(ialu_reg_mem);
14096 %}
14097 
14098 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14099 %{
14100   predicate(UseAPX);
14101   match(Set dst (OrL src1 (LoadL src2)));
14102   effect(KILL cr);
14103   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14104 
14105   ins_cost(150);
14106   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14107   ins_encode %{
14108     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14109   %}
14110   ins_pipe(ialu_reg_mem);
14111 %}
14112 
14113 // Or Memory with Register
14114 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14115 %{
14116   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14117   effect(KILL cr);
14118   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14119 
14120   ins_cost(150);
14121   format %{ "orq     $dst, $src\t# long" %}
14122   ins_encode %{
14123     __ orq($dst$$Address, $src$$Register);
14124   %}
14125   ins_pipe(ialu_mem_reg);
14126 %}
14127 
14128 // Or Memory with Immediate
14129 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14130 %{
14131   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14132   effect(KILL cr);
14133   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14134 
14135   ins_cost(125);
14136   format %{ "orq     $dst, $src\t# long" %}
14137   ins_encode %{
14138     __ orq($dst$$Address, $src$$constant);
14139   %}
14140   ins_pipe(ialu_mem_imm);
14141 %}
14142 
14143 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14144 %{
14145   // con should be a pure 64-bit power of 2 immediate
14146   // because AND/OR works well enough for 8/32-bit values.
14147   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14148 
14149   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14150   effect(KILL cr);
14151 
14152   ins_cost(125);
14153   format %{ "btsq    $dst, log2($con)\t# long" %}
14154   ins_encode %{
14155     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14156   %}
14157   ins_pipe(ialu_mem_imm);
14158 %}
14159 
14160 // Xor Instructions
14161 // Xor Register with Register
14162 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14163 %{
14164   predicate(!UseAPX);
14165   match(Set dst (XorL dst src));
14166   effect(KILL cr);
14167   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14168 
14169   format %{ "xorq    $dst, $src\t# long" %}
14170   ins_encode %{
14171     __ xorq($dst$$Register, $src$$Register);
14172   %}
14173   ins_pipe(ialu_reg_reg);
14174 %}
14175 
14176 // Xor Register with Register using New Data Destination (NDD)
14177 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14178 %{
14179   predicate(UseAPX);
14180   match(Set dst (XorL src1 src2));
14181   effect(KILL cr);
14182   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14183 
14184   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14185   ins_encode %{
14186     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14187   %}
14188   ins_pipe(ialu_reg_reg);
14189 %}
14190 
14191 // Xor Register with Immediate -1
14192 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14193 %{
14194   predicate(!UseAPX);
14195   match(Set dst (XorL dst imm));
14196 
14197   format %{ "notq   $dst" %}
14198   ins_encode %{
14199      __ notq($dst$$Register);
14200   %}
14201   ins_pipe(ialu_reg);
14202 %}
14203 
14204 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14205 %{
14206   predicate(UseAPX);
14207   match(Set dst (XorL src imm));
14208   flag(PD::Flag_ndd_demotable_opr1);
14209 
14210   format %{ "enotq   $dst, $src" %}
14211   ins_encode %{
14212     __ enotq($dst$$Register, $src$$Register);
14213   %}
14214   ins_pipe(ialu_reg);
14215 %}
14216 
14217 // Xor Register with Immediate
14218 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14219 %{
14220   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14221   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14222   match(Set dst (XorL dst src));
14223   effect(KILL cr);
14224   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14225 
14226   format %{ "xorq    $dst, $src\t# long" %}
14227   ins_encode %{
14228     __ xorq($dst$$Register, $src$$constant);
14229   %}
14230   ins_pipe(ialu_reg);
14231 %}
14232 
14233 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14234 %{
14235   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14236   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14237   match(Set dst (XorL src1 src2));
14238   effect(KILL cr);
14239   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14240 
14241   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14242   ins_encode %{
14243     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14244   %}
14245   ins_pipe(ialu_reg);
14246 %}
14247 
14248 // Xor Memory with Immediate
14249 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14250 %{
14251   predicate(UseAPX);
14252   match(Set dst (XorL (LoadL src1) src2));
14253   effect(KILL cr);
14254   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14255   ins_cost(150);
14256 
14257   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14258   ins_encode %{
14259     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14260   %}
14261   ins_pipe(ialu_reg);
14262 %}
14263 
14264 // Xor Register with Memory
14265 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14266 %{
14267   predicate(!UseAPX);
14268   match(Set dst (XorL dst (LoadL src)));
14269   effect(KILL cr);
14270   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14271 
14272   ins_cost(150);
14273   format %{ "xorq    $dst, $src\t# long" %}
14274   ins_encode %{
14275     __ xorq($dst$$Register, $src$$Address);
14276   %}
14277   ins_pipe(ialu_reg_mem);
14278 %}
14279 
14280 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14281 %{
14282   predicate(UseAPX);
14283   match(Set dst (XorL src1 (LoadL src2)));
14284   effect(KILL cr);
14285   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14286 
14287   ins_cost(150);
14288   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14289   ins_encode %{
14290     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14291   %}
14292   ins_pipe(ialu_reg_mem);
14293 %}
14294 
14295 // Xor Memory with Register
14296 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14297 %{
14298   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14299   effect(KILL cr);
14300   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14301 
14302   ins_cost(150);
14303   format %{ "xorq    $dst, $src\t# long" %}
14304   ins_encode %{
14305     __ xorq($dst$$Address, $src$$Register);
14306   %}
14307   ins_pipe(ialu_mem_reg);
14308 %}
14309 
14310 // Xor Memory with Immediate
14311 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14312 %{
14313   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14314   effect(KILL cr);
14315   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14316 
14317   ins_cost(125);
14318   format %{ "xorq    $dst, $src\t# long" %}
14319   ins_encode %{
14320     __ xorq($dst$$Address, $src$$constant);
14321   %}
14322   ins_pipe(ialu_mem_imm);
14323 %}
14324 
14325 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14326 %{
14327   match(Set dst (CmpLTMask p q));
14328   effect(KILL cr);
14329 
14330   ins_cost(400);
14331   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14332             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14333             "negl    $dst" %}
14334   ins_encode %{
14335     __ cmpl($p$$Register, $q$$Register);
14336     __ setcc(Assembler::less, $dst$$Register);
14337     __ negl($dst$$Register);
14338   %}
14339   ins_pipe(pipe_slow);
14340 %}
14341 
14342 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14343 %{
14344   match(Set dst (CmpLTMask dst zero));
14345   effect(KILL cr);
14346 
14347   ins_cost(100);
14348   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14349   ins_encode %{
14350     __ sarl($dst$$Register, 31);
14351   %}
14352   ins_pipe(ialu_reg);
14353 %}
14354 
14355 /* Better to save a register than avoid a branch */
14356 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14357 %{
14358   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14359   effect(KILL cr);
14360   ins_cost(300);
14361   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14362             "jge     done\n\t"
14363             "addl    $p,$y\n"
14364             "done:   " %}
14365   ins_encode %{
14366     Register Rp = $p$$Register;
14367     Register Rq = $q$$Register;
14368     Register Ry = $y$$Register;
14369     Label done;
14370     __ subl(Rp, Rq);
14371     __ jccb(Assembler::greaterEqual, done);
14372     __ addl(Rp, Ry);
14373     __ bind(done);
14374   %}
14375   ins_pipe(pipe_cmplt);
14376 %}
14377 
14378 /* Better to save a register than avoid a branch */
14379 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14380 %{
14381   match(Set y (AndI (CmpLTMask p q) y));
14382   effect(KILL cr);
14383 
14384   ins_cost(300);
14385 
14386   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14387             "jlt     done\n\t"
14388             "xorl    $y, $y\n"
14389             "done:   " %}
14390   ins_encode %{
14391     Register Rp = $p$$Register;
14392     Register Rq = $q$$Register;
14393     Register Ry = $y$$Register;
14394     Label done;
14395     __ cmpl(Rp, Rq);
14396     __ jccb(Assembler::less, done);
14397     __ xorl(Ry, Ry);
14398     __ bind(done);
14399   %}
14400   ins_pipe(pipe_cmplt);
14401 %}
14402 
14403 
14404 //---------- FP Instructions------------------------------------------------
14405 
14406 // Really expensive, avoid
14407 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14408 %{
14409   match(Set cr (CmpF src1 src2));
14410 
14411   ins_cost(500);
14412   format %{ "ucomiss $src1, $src2\n\t"
14413             "jnp,s   exit\n\t"
14414             "pushfq\t# saw NaN, set CF\n\t"
14415             "andq    [rsp], #0xffffff2b\n\t"
14416             "popfq\n"
14417     "exit:" %}
14418   ins_encode %{
14419     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14420     emit_cmpfp_fixup(masm);
14421   %}
14422   ins_pipe(pipe_slow);
14423 %}
14424 
14425 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14426   match(Set cr (CmpF src1 src2));
14427 
14428   ins_cost(100);
14429   format %{ "ucomiss $src1, $src2" %}
14430   ins_encode %{
14431     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14432   %}
14433   ins_pipe(pipe_slow);
14434 %}
14435 
14436 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14437   match(Set cr (CmpF src1 src2));
14438 
14439   ins_cost(100);
14440   format %{ "vucomxss $src1, $src2" %}
14441   ins_encode %{
14442     __ vucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14443   %}
14444   ins_pipe(pipe_slow);
14445 %}
14446 
14447 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14448   match(Set cr (CmpF src1 (LoadF src2)));
14449 
14450   ins_cost(100);
14451   format %{ "ucomiss $src1, $src2" %}
14452   ins_encode %{
14453     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14454   %}
14455   ins_pipe(pipe_slow);
14456 %}
14457 
14458 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14459   match(Set cr (CmpF src1 (LoadF src2)));
14460 
14461   ins_cost(100);
14462   format %{ "vucomxss $src1, $src2" %}
14463   ins_encode %{
14464     __ vucomxss($src1$$XMMRegister, $src2$$Address);
14465   %}
14466   ins_pipe(pipe_slow);
14467 %}
14468 
14469 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14470   match(Set cr (CmpF src con));
14471 
14472   ins_cost(100);
14473   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14474   ins_encode %{
14475     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14476   %}
14477   ins_pipe(pipe_slow);
14478 %}
14479 
14480 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14481   match(Set cr (CmpF src con));
14482 
14483   ins_cost(100);
14484   format %{ "vucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14485   ins_encode %{
14486     __ vucomxss($src$$XMMRegister, $constantaddress($con));
14487   %}
14488   ins_pipe(pipe_slow);
14489 %}
14490 
14491 // Really expensive, avoid
14492 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14493 %{
14494   match(Set cr (CmpD src1 src2));
14495 
14496   ins_cost(500);
14497   format %{ "ucomisd $src1, $src2\n\t"
14498             "jnp,s   exit\n\t"
14499             "pushfq\t# saw NaN, set CF\n\t"
14500             "andq    [rsp], #0xffffff2b\n\t"
14501             "popfq\n"
14502     "exit:" %}
14503   ins_encode %{
14504     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14505     emit_cmpfp_fixup(masm);
14506   %}
14507   ins_pipe(pipe_slow);
14508 %}
14509 
14510 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14511   match(Set cr (CmpD src1 src2));
14512 
14513   ins_cost(100);
14514   format %{ "ucomisd $src1, $src2 test" %}
14515   ins_encode %{
14516     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14517   %}
14518   ins_pipe(pipe_slow);
14519 %}
14520 
14521 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14522   match(Set cr (CmpD src1 src2));
14523 
14524   ins_cost(100);
14525   format %{ "vucomxsd $src1, $src2 test" %}
14526   ins_encode %{
14527     __ vucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14528   %}
14529   ins_pipe(pipe_slow);
14530 %}
14531 
14532 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14533   match(Set cr (CmpD src1 (LoadD src2)));
14534 
14535   ins_cost(100);
14536   format %{ "ucomisd $src1, $src2" %}
14537   ins_encode %{
14538     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14539   %}
14540   ins_pipe(pipe_slow);
14541 %}
14542 
14543 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14544   match(Set cr (CmpD src1 (LoadD src2)));
14545 
14546   ins_cost(100);
14547   format %{ "vucomxsd $src1, $src2" %}
14548   ins_encode %{
14549     __ vucomxsd($src1$$XMMRegister, $src2$$Address);
14550   %}
14551   ins_pipe(pipe_slow);
14552 %}
14553 
14554 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14555   match(Set cr (CmpD src con));
14556   ins_cost(100);
14557   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14558   ins_encode %{
14559     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14560   %}
14561   ins_pipe(pipe_slow);
14562 %}
14563 
14564 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14565   match(Set cr (CmpD src con));
14566 
14567   ins_cost(100);
14568   format %{ "vucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14569   ins_encode %{
14570     __ vucomxsd($src$$XMMRegister, $constantaddress($con));
14571   %}
14572   ins_pipe(pipe_slow);
14573 %}
14574 
14575 // Compare into -1,0,1
14576 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14577 %{
14578   match(Set dst (CmpF3 src1 src2));
14579   effect(KILL cr);
14580 
14581   ins_cost(275);
14582   format %{ "ucomiss $src1, $src2\n\t"
14583             "movl    $dst, #-1\n\t"
14584             "jp,s    done\n\t"
14585             "jb,s    done\n\t"
14586             "setne   $dst\n\t"
14587             "movzbl  $dst, $dst\n"
14588     "done:" %}
14589   ins_encode %{
14590     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14591     emit_cmpfp3(masm, $dst$$Register);
14592   %}
14593   ins_pipe(pipe_slow);
14594 %}
14595 
14596 // Compare into -1,0,1
14597 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14598 %{
14599   match(Set dst (CmpF3 src1 (LoadF src2)));
14600   effect(KILL cr);
14601 
14602   ins_cost(275);
14603   format %{ "ucomiss $src1, $src2\n\t"
14604             "movl    $dst, #-1\n\t"
14605             "jp,s    done\n\t"
14606             "jb,s    done\n\t"
14607             "setne   $dst\n\t"
14608             "movzbl  $dst, $dst\n"
14609     "done:" %}
14610   ins_encode %{
14611     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14612     emit_cmpfp3(masm, $dst$$Register);
14613   %}
14614   ins_pipe(pipe_slow);
14615 %}
14616 
14617 // Compare into -1,0,1
14618 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14619   match(Set dst (CmpF3 src con));
14620   effect(KILL cr);
14621 
14622   ins_cost(275);
14623   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14624             "movl    $dst, #-1\n\t"
14625             "jp,s    done\n\t"
14626             "jb,s    done\n\t"
14627             "setne   $dst\n\t"
14628             "movzbl  $dst, $dst\n"
14629     "done:" %}
14630   ins_encode %{
14631     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14632     emit_cmpfp3(masm, $dst$$Register);
14633   %}
14634   ins_pipe(pipe_slow);
14635 %}
14636 
14637 // Compare into -1,0,1
14638 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14639 %{
14640   match(Set dst (CmpD3 src1 src2));
14641   effect(KILL cr);
14642 
14643   ins_cost(275);
14644   format %{ "ucomisd $src1, $src2\n\t"
14645             "movl    $dst, #-1\n\t"
14646             "jp,s    done\n\t"
14647             "jb,s    done\n\t"
14648             "setne   $dst\n\t"
14649             "movzbl  $dst, $dst\n"
14650     "done:" %}
14651   ins_encode %{
14652     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14653     emit_cmpfp3(masm, $dst$$Register);
14654   %}
14655   ins_pipe(pipe_slow);
14656 %}
14657 
14658 // Compare into -1,0,1
14659 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14660 %{
14661   match(Set dst (CmpD3 src1 (LoadD src2)));
14662   effect(KILL cr);
14663 
14664   ins_cost(275);
14665   format %{ "ucomisd $src1, $src2\n\t"
14666             "movl    $dst, #-1\n\t"
14667             "jp,s    done\n\t"
14668             "jb,s    done\n\t"
14669             "setne   $dst\n\t"
14670             "movzbl  $dst, $dst\n"
14671     "done:" %}
14672   ins_encode %{
14673     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14674     emit_cmpfp3(masm, $dst$$Register);
14675   %}
14676   ins_pipe(pipe_slow);
14677 %}
14678 
14679 // Compare into -1,0,1
14680 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14681   match(Set dst (CmpD3 src con));
14682   effect(KILL cr);
14683 
14684   ins_cost(275);
14685   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14686             "movl    $dst, #-1\n\t"
14687             "jp,s    done\n\t"
14688             "jb,s    done\n\t"
14689             "setne   $dst\n\t"
14690             "movzbl  $dst, $dst\n"
14691     "done:" %}
14692   ins_encode %{
14693     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14694     emit_cmpfp3(masm, $dst$$Register);
14695   %}
14696   ins_pipe(pipe_slow);
14697 %}
14698 
14699 //----------Arithmetic Conversion Instructions---------------------------------
14700 
14701 instruct convF2D_reg_reg(regD dst, regF src)
14702 %{
14703   match(Set dst (ConvF2D src));
14704 
14705   format %{ "cvtss2sd $dst, $src" %}
14706   ins_encode %{
14707     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14708   %}
14709   ins_pipe(pipe_slow); // XXX
14710 %}
14711 
14712 instruct convF2D_reg_mem(regD dst, memory src)
14713 %{
14714   predicate(UseAVX == 0);
14715   match(Set dst (ConvF2D (LoadF src)));
14716 
14717   format %{ "cvtss2sd $dst, $src" %}
14718   ins_encode %{
14719     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14720   %}
14721   ins_pipe(pipe_slow); // XXX
14722 %}
14723 
14724 instruct convD2F_reg_reg(regF dst, regD src)
14725 %{
14726   match(Set dst (ConvD2F src));
14727 
14728   format %{ "cvtsd2ss $dst, $src" %}
14729   ins_encode %{
14730     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14731   %}
14732   ins_pipe(pipe_slow); // XXX
14733 %}
14734 
14735 instruct convD2F_reg_mem(regF dst, memory src)
14736 %{
14737   predicate(UseAVX == 0);
14738   match(Set dst (ConvD2F (LoadD src)));
14739 
14740   format %{ "cvtsd2ss $dst, $src" %}
14741   ins_encode %{
14742     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14743   %}
14744   ins_pipe(pipe_slow); // XXX
14745 %}
14746 
14747 // XXX do mem variants
14748 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14749 %{
14750   predicate(!VM_Version::supports_avx10_2());
14751   match(Set dst (ConvF2I src));
14752   effect(KILL cr);
14753   format %{ "convert_f2i $dst, $src" %}
14754   ins_encode %{
14755     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14756   %}
14757   ins_pipe(pipe_slow);
14758 %}
14759 
14760 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14761 %{
14762   predicate(VM_Version::supports_avx10_2());
14763   match(Set dst (ConvF2I src));
14764   format %{ "evcvttss2sisl $dst, $src" %}
14765   ins_encode %{
14766     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14767   %}
14768   ins_pipe(pipe_slow);
14769 %}
14770 
14771 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14772 %{
14773   predicate(VM_Version::supports_avx10_2());
14774   match(Set dst (ConvF2I (LoadF src)));
14775   format %{ "evcvttss2sisl $dst, $src" %}
14776   ins_encode %{
14777     __ evcvttss2sisl($dst$$Register, $src$$Address);
14778   %}
14779   ins_pipe(pipe_slow);
14780 %}
14781 
14782 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14783 %{
14784   predicate(!VM_Version::supports_avx10_2());
14785   match(Set dst (ConvF2L src));
14786   effect(KILL cr);
14787   format %{ "convert_f2l $dst, $src"%}
14788   ins_encode %{
14789     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14790   %}
14791   ins_pipe(pipe_slow);
14792 %}
14793 
14794 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14795 %{
14796   predicate(VM_Version::supports_avx10_2());
14797   match(Set dst (ConvF2L src));
14798   format %{ "evcvttss2sisq $dst, $src" %}
14799   ins_encode %{
14800     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14801   %}
14802   ins_pipe(pipe_slow);
14803 %}
14804 
14805 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14806 %{
14807   predicate(VM_Version::supports_avx10_2());
14808   match(Set dst (ConvF2L (LoadF src)));
14809   format %{ "evcvttss2sisq $dst, $src" %}
14810   ins_encode %{
14811     __ evcvttss2sisq($dst$$Register, $src$$Address);
14812   %}
14813   ins_pipe(pipe_slow);
14814 %}
14815 
14816 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14817 %{
14818   predicate(!VM_Version::supports_avx10_2());
14819   match(Set dst (ConvD2I src));
14820   effect(KILL cr);
14821   format %{ "convert_d2i $dst, $src"%}
14822   ins_encode %{
14823     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14824   %}
14825   ins_pipe(pipe_slow);
14826 %}
14827 
14828 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14829 %{
14830   predicate(VM_Version::supports_avx10_2());
14831   match(Set dst (ConvD2I src));
14832   format %{ "evcvttsd2sisl $dst, $src" %}
14833   ins_encode %{
14834     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14835   %}
14836   ins_pipe(pipe_slow);
14837 %}
14838 
14839 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14840 %{
14841   predicate(VM_Version::supports_avx10_2());
14842   match(Set dst (ConvD2I (LoadD src)));
14843   format %{ "evcvttsd2sisl $dst, $src" %}
14844   ins_encode %{
14845     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14846   %}
14847   ins_pipe(pipe_slow);
14848 %}
14849 
14850 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14851 %{
14852   predicate(!VM_Version::supports_avx10_2());
14853   match(Set dst (ConvD2L src));
14854   effect(KILL cr);
14855   format %{ "convert_d2l $dst, $src"%}
14856   ins_encode %{
14857     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14858   %}
14859   ins_pipe(pipe_slow);
14860 %}
14861 
14862 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14863 %{
14864   predicate(VM_Version::supports_avx10_2());
14865   match(Set dst (ConvD2L src));
14866   format %{ "evcvttsd2sisq $dst, $src" %}
14867   ins_encode %{
14868     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14869   %}
14870   ins_pipe(pipe_slow);
14871 %}
14872 
14873 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14874 %{
14875   predicate(VM_Version::supports_avx10_2());
14876   match(Set dst (ConvD2L (LoadD src)));
14877   format %{ "evcvttsd2sisq $dst, $src" %}
14878   ins_encode %{
14879     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14880   %}
14881   ins_pipe(pipe_slow);
14882 %}
14883 
14884 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14885 %{
14886   match(Set dst (RoundD src));
14887   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14888   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14889   ins_encode %{
14890     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14891   %}
14892   ins_pipe(pipe_slow);
14893 %}
14894 
14895 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14896 %{
14897   match(Set dst (RoundF src));
14898   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14899   format %{ "round_float $dst,$src" %}
14900   ins_encode %{
14901     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14902   %}
14903   ins_pipe(pipe_slow);
14904 %}
14905 
14906 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14907 %{
14908   predicate(!UseXmmI2F);
14909   match(Set dst (ConvI2F src));
14910 
14911   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14912   ins_encode %{
14913     if (UseAVX > 0) {
14914       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14915     }
14916     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14917   %}
14918   ins_pipe(pipe_slow); // XXX
14919 %}
14920 
14921 instruct convI2F_reg_mem(regF dst, memory src)
14922 %{
14923   predicate(UseAVX == 0);
14924   match(Set dst (ConvI2F (LoadI src)));
14925 
14926   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14927   ins_encode %{
14928     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14929   %}
14930   ins_pipe(pipe_slow); // XXX
14931 %}
14932 
14933 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14934 %{
14935   predicate(!UseXmmI2D);
14936   match(Set dst (ConvI2D src));
14937 
14938   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14939   ins_encode %{
14940     if (UseAVX > 0) {
14941       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14942     }
14943     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14944   %}
14945   ins_pipe(pipe_slow); // XXX
14946 %}
14947 
14948 instruct convI2D_reg_mem(regD dst, memory src)
14949 %{
14950   predicate(UseAVX == 0);
14951   match(Set dst (ConvI2D (LoadI src)));
14952 
14953   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14954   ins_encode %{
14955     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14956   %}
14957   ins_pipe(pipe_slow); // XXX
14958 %}
14959 
14960 instruct convXI2F_reg(regF dst, rRegI src)
14961 %{
14962   predicate(UseXmmI2F);
14963   match(Set dst (ConvI2F src));
14964 
14965   format %{ "movdl $dst, $src\n\t"
14966             "cvtdq2psl $dst, $dst\t# i2f" %}
14967   ins_encode %{
14968     __ movdl($dst$$XMMRegister, $src$$Register);
14969     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14970   %}
14971   ins_pipe(pipe_slow); // XXX
14972 %}
14973 
14974 instruct convXI2D_reg(regD dst, rRegI src)
14975 %{
14976   predicate(UseXmmI2D);
14977   match(Set dst (ConvI2D src));
14978 
14979   format %{ "movdl $dst, $src\n\t"
14980             "cvtdq2pdl $dst, $dst\t# i2d" %}
14981   ins_encode %{
14982     __ movdl($dst$$XMMRegister, $src$$Register);
14983     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14984   %}
14985   ins_pipe(pipe_slow); // XXX
14986 %}
14987 
14988 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14989 %{
14990   match(Set dst (ConvL2F src));
14991 
14992   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14993   ins_encode %{
14994     if (UseAVX > 0) {
14995       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14996     }
14997     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14998   %}
14999   ins_pipe(pipe_slow); // XXX
15000 %}
15001 
15002 instruct convL2F_reg_mem(regF dst, memory src)
15003 %{
15004   predicate(UseAVX == 0);
15005   match(Set dst (ConvL2F (LoadL src)));
15006 
15007   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
15008   ins_encode %{
15009     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
15010   %}
15011   ins_pipe(pipe_slow); // XXX
15012 %}
15013 
15014 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
15015 %{
15016   match(Set dst (ConvL2D src));
15017 
15018   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15019   ins_encode %{
15020     if (UseAVX > 0) {
15021       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
15022     }
15023     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
15024   %}
15025   ins_pipe(pipe_slow); // XXX
15026 %}
15027 
15028 instruct convL2D_reg_mem(regD dst, memory src)
15029 %{
15030   predicate(UseAVX == 0);
15031   match(Set dst (ConvL2D (LoadL src)));
15032 
15033   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15034   ins_encode %{
15035     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15036   %}
15037   ins_pipe(pipe_slow); // XXX
15038 %}
15039 
15040 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15041 %{
15042   match(Set dst (ConvI2L src));
15043 
15044   ins_cost(125);
15045   format %{ "movslq  $dst, $src\t# i2l" %}
15046   ins_encode %{
15047     __ movslq($dst$$Register, $src$$Register);
15048   %}
15049   ins_pipe(ialu_reg_reg);
15050 %}
15051 
15052 // Zero-extend convert int to long
15053 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15054 %{
15055   match(Set dst (AndL (ConvI2L src) mask));
15056 
15057   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15058   ins_encode %{
15059     if ($dst$$reg != $src$$reg) {
15060       __ movl($dst$$Register, $src$$Register);
15061     }
15062   %}
15063   ins_pipe(ialu_reg_reg);
15064 %}
15065 
15066 // Zero-extend convert int to long
15067 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15068 %{
15069   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15070 
15071   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15072   ins_encode %{
15073     __ movl($dst$$Register, $src$$Address);
15074   %}
15075   ins_pipe(ialu_reg_mem);
15076 %}
15077 
15078 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15079 %{
15080   match(Set dst (AndL src mask));
15081 
15082   format %{ "movl    $dst, $src\t# zero-extend long" %}
15083   ins_encode %{
15084     __ movl($dst$$Register, $src$$Register);
15085   %}
15086   ins_pipe(ialu_reg_reg);
15087 %}
15088 
15089 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15090 %{
15091   match(Set dst (ConvL2I src));
15092 
15093   format %{ "movl    $dst, $src\t# l2i" %}
15094   ins_encode %{
15095     __ movl($dst$$Register, $src$$Register);
15096   %}
15097   ins_pipe(ialu_reg_reg);
15098 %}
15099 
15100 
15101 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15102   match(Set dst (MoveF2I src));
15103   effect(DEF dst, USE src);
15104 
15105   ins_cost(125);
15106   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
15107   ins_encode %{
15108     __ movl($dst$$Register, Address(rsp, $src$$disp));
15109   %}
15110   ins_pipe(ialu_reg_mem);
15111 %}
15112 
15113 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15114   match(Set dst (MoveI2F src));
15115   effect(DEF dst, USE src);
15116 
15117   ins_cost(125);
15118   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
15119   ins_encode %{
15120     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15121   %}
15122   ins_pipe(pipe_slow);
15123 %}
15124 
15125 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15126   match(Set dst (MoveD2L src));
15127   effect(DEF dst, USE src);
15128 
15129   ins_cost(125);
15130   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
15131   ins_encode %{
15132     __ movq($dst$$Register, Address(rsp, $src$$disp));
15133   %}
15134   ins_pipe(ialu_reg_mem);
15135 %}
15136 
15137 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15138   predicate(!UseXmmLoadAndClearUpper);
15139   match(Set dst (MoveL2D src));
15140   effect(DEF dst, USE src);
15141 
15142   ins_cost(125);
15143   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
15144   ins_encode %{
15145     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15146   %}
15147   ins_pipe(pipe_slow);
15148 %}
15149 
15150 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15151   predicate(UseXmmLoadAndClearUpper);
15152   match(Set dst (MoveL2D src));
15153   effect(DEF dst, USE src);
15154 
15155   ins_cost(125);
15156   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
15157   ins_encode %{
15158     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15159   %}
15160   ins_pipe(pipe_slow);
15161 %}
15162 
15163 
15164 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15165   match(Set dst (MoveF2I src));
15166   effect(DEF dst, USE src);
15167 
15168   ins_cost(95); // XXX
15169   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
15170   ins_encode %{
15171     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15172   %}
15173   ins_pipe(pipe_slow);
15174 %}
15175 
15176 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15177   match(Set dst (MoveI2F src));
15178   effect(DEF dst, USE src);
15179 
15180   ins_cost(100);
15181   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
15182   ins_encode %{
15183     __ movl(Address(rsp, $dst$$disp), $src$$Register);
15184   %}
15185   ins_pipe( ialu_mem_reg );
15186 %}
15187 
15188 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15189   match(Set dst (MoveD2L src));
15190   effect(DEF dst, USE src);
15191 
15192   ins_cost(95); // XXX
15193   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
15194   ins_encode %{
15195     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15196   %}
15197   ins_pipe(pipe_slow);
15198 %}
15199 
15200 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15201   match(Set dst (MoveL2D src));
15202   effect(DEF dst, USE src);
15203 
15204   ins_cost(100);
15205   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
15206   ins_encode %{
15207     __ movq(Address(rsp, $dst$$disp), $src$$Register);
15208   %}
15209   ins_pipe(ialu_mem_reg);
15210 %}
15211 
15212 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15213   match(Set dst (MoveF2I src));
15214   effect(DEF dst, USE src);
15215   ins_cost(85);
15216   format %{ "movd    $dst,$src\t# MoveF2I" %}
15217   ins_encode %{
15218     __ movdl($dst$$Register, $src$$XMMRegister);
15219   %}
15220   ins_pipe( pipe_slow );
15221 %}
15222 
15223 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15224   match(Set dst (MoveD2L src));
15225   effect(DEF dst, USE src);
15226   ins_cost(85);
15227   format %{ "movd    $dst,$src\t# MoveD2L" %}
15228   ins_encode %{
15229     __ movdq($dst$$Register, $src$$XMMRegister);
15230   %}
15231   ins_pipe( pipe_slow );
15232 %}
15233 
15234 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15235   match(Set dst (MoveI2F src));
15236   effect(DEF dst, USE src);
15237   ins_cost(100);
15238   format %{ "movd    $dst,$src\t# MoveI2F" %}
15239   ins_encode %{
15240     __ movdl($dst$$XMMRegister, $src$$Register);
15241   %}
15242   ins_pipe( pipe_slow );
15243 %}
15244 
15245 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15246   match(Set dst (MoveL2D src));
15247   effect(DEF dst, USE src);
15248   ins_cost(100);
15249   format %{ "movd    $dst,$src\t# MoveL2D" %}
15250   ins_encode %{
15251      __ movdq($dst$$XMMRegister, $src$$Register);
15252   %}
15253   ins_pipe( pipe_slow );
15254 %}
15255 
15256 
15257 // Fast clearing of an array
15258 // Small non-constant lenght ClearArray for non-AVX512 targets.
15259 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15260                   Universe dummy, rFlagsReg cr)
15261 %{
15262   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15263   match(Set dummy (ClearArray (Binary cnt base) val));
15264   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15265 
15266   format %{ $$template
15267     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15268     $$emit$$"jg      LARGE\n\t"
15269     $$emit$$"dec     rcx\n\t"
15270     $$emit$$"js      DONE\t# Zero length\n\t"
15271     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15272     $$emit$$"dec     rcx\n\t"
15273     $$emit$$"jge     LOOP\n\t"
15274     $$emit$$"jmp     DONE\n\t"
15275     $$emit$$"# LARGE:\n\t"
15276     if (UseFastStosb) {
15277        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15278        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15279     } else if (UseXMMForObjInit) {
15280        $$emit$$"movdq   $tmp, $val\n\t"
15281        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15282        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15283        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15284        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15285        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15286        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15287        $$emit$$"add     0x40,rax\n\t"
15288        $$emit$$"# L_zero_64_bytes:\n\t"
15289        $$emit$$"sub     0x8,rcx\n\t"
15290        $$emit$$"jge     L_loop\n\t"
15291        $$emit$$"add     0x4,rcx\n\t"
15292        $$emit$$"jl      L_tail\n\t"
15293        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15294        $$emit$$"add     0x20,rax\n\t"
15295        $$emit$$"sub     0x4,rcx\n\t"
15296        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15297        $$emit$$"add     0x4,rcx\n\t"
15298        $$emit$$"jle     L_end\n\t"
15299        $$emit$$"dec     rcx\n\t"
15300        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15301        $$emit$$"vmovq   xmm0,(rax)\n\t"
15302        $$emit$$"add     0x8,rax\n\t"
15303        $$emit$$"dec     rcx\n\t"
15304        $$emit$$"jge     L_sloop\n\t"
15305        $$emit$$"# L_end:\n\t"
15306     } else {
15307        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15308     }
15309     $$emit$$"# DONE"
15310   %}
15311   ins_encode %{
15312     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15313                  $tmp$$XMMRegister, false, false);
15314   %}
15315   ins_pipe(pipe_slow);
15316 %}
15317 
15318 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15319                             Universe dummy, rFlagsReg cr)
15320 %{
15321   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15322   match(Set dummy (ClearArray (Binary cnt base) val));
15323   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15324 
15325   format %{ $$template
15326     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15327     $$emit$$"jg      LARGE\n\t"
15328     $$emit$$"dec     rcx\n\t"
15329     $$emit$$"js      DONE\t# Zero length\n\t"
15330     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15331     $$emit$$"dec     rcx\n\t"
15332     $$emit$$"jge     LOOP\n\t"
15333     $$emit$$"jmp     DONE\n\t"
15334     $$emit$$"# LARGE:\n\t"
15335     if (UseXMMForObjInit) {
15336        $$emit$$"movdq   $tmp, $val\n\t"
15337        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15338        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15339        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15340        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15341        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15342        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15343        $$emit$$"add     0x40,rax\n\t"
15344        $$emit$$"# L_zero_64_bytes:\n\t"
15345        $$emit$$"sub     0x8,rcx\n\t"
15346        $$emit$$"jge     L_loop\n\t"
15347        $$emit$$"add     0x4,rcx\n\t"
15348        $$emit$$"jl      L_tail\n\t"
15349        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15350        $$emit$$"add     0x20,rax\n\t"
15351        $$emit$$"sub     0x4,rcx\n\t"
15352        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15353        $$emit$$"add     0x4,rcx\n\t"
15354        $$emit$$"jle     L_end\n\t"
15355        $$emit$$"dec     rcx\n\t"
15356        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15357        $$emit$$"vmovq   xmm0,(rax)\n\t"
15358        $$emit$$"add     0x8,rax\n\t"
15359        $$emit$$"dec     rcx\n\t"
15360        $$emit$$"jge     L_sloop\n\t"
15361        $$emit$$"# L_end:\n\t"
15362     } else {
15363        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15364     }
15365     $$emit$$"# DONE"
15366   %}
15367   ins_encode %{
15368     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15369                  $tmp$$XMMRegister, false, true);
15370   %}
15371   ins_pipe(pipe_slow);
15372 %}
15373 
15374 // Small non-constant length ClearArray for AVX512 targets.
15375 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15376                        Universe dummy, rFlagsReg cr)
15377 %{
15378   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15379   match(Set dummy (ClearArray (Binary cnt base) val));
15380   ins_cost(125);
15381   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15382 
15383   format %{ $$template
15384     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15385     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15386     $$emit$$"jg      LARGE\n\t"
15387     $$emit$$"dec     rcx\n\t"
15388     $$emit$$"js      DONE\t# Zero length\n\t"
15389     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15390     $$emit$$"dec     rcx\n\t"
15391     $$emit$$"jge     LOOP\n\t"
15392     $$emit$$"jmp     DONE\n\t"
15393     $$emit$$"# LARGE:\n\t"
15394     if (UseFastStosb) {
15395        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15396        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15397     } else if (UseXMMForObjInit) {
15398        $$emit$$"mov     rdi,rax\n\t"
15399        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15400        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15401        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15402        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15403        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15404        $$emit$$"add     0x40,rax\n\t"
15405        $$emit$$"# L_zero_64_bytes:\n\t"
15406        $$emit$$"sub     0x8,rcx\n\t"
15407        $$emit$$"jge     L_loop\n\t"
15408        $$emit$$"add     0x4,rcx\n\t"
15409        $$emit$$"jl      L_tail\n\t"
15410        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15411        $$emit$$"add     0x20,rax\n\t"
15412        $$emit$$"sub     0x4,rcx\n\t"
15413        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15414        $$emit$$"add     0x4,rcx\n\t"
15415        $$emit$$"jle     L_end\n\t"
15416        $$emit$$"dec     rcx\n\t"
15417        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15418        $$emit$$"vmovq   xmm0,(rax)\n\t"
15419        $$emit$$"add     0x8,rax\n\t"
15420        $$emit$$"dec     rcx\n\t"
15421        $$emit$$"jge     L_sloop\n\t"
15422        $$emit$$"# L_end:\n\t"
15423     } else {
15424        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15425     }
15426     $$emit$$"# DONE"
15427   %}
15428   ins_encode %{
15429     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15430                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15431   %}
15432   ins_pipe(pipe_slow);
15433 %}
15434 
15435 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15436                                  Universe dummy, rFlagsReg cr)
15437 %{
15438   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15439   match(Set dummy (ClearArray (Binary cnt base) val));
15440   ins_cost(125);
15441   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15442 
15443   format %{ $$template
15444     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15445     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15446     $$emit$$"jg      LARGE\n\t"
15447     $$emit$$"dec     rcx\n\t"
15448     $$emit$$"js      DONE\t# Zero length\n\t"
15449     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15450     $$emit$$"dec     rcx\n\t"
15451     $$emit$$"jge     LOOP\n\t"
15452     $$emit$$"jmp     DONE\n\t"
15453     $$emit$$"# LARGE:\n\t"
15454     if (UseFastStosb) {
15455        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15456        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15457     } else if (UseXMMForObjInit) {
15458        $$emit$$"mov     rdi,rax\n\t"
15459        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15460        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15461        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15462        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15463        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15464        $$emit$$"add     0x40,rax\n\t"
15465        $$emit$$"# L_zero_64_bytes:\n\t"
15466        $$emit$$"sub     0x8,rcx\n\t"
15467        $$emit$$"jge     L_loop\n\t"
15468        $$emit$$"add     0x4,rcx\n\t"
15469        $$emit$$"jl      L_tail\n\t"
15470        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15471        $$emit$$"add     0x20,rax\n\t"
15472        $$emit$$"sub     0x4,rcx\n\t"
15473        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15474        $$emit$$"add     0x4,rcx\n\t"
15475        $$emit$$"jle     L_end\n\t"
15476        $$emit$$"dec     rcx\n\t"
15477        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15478        $$emit$$"vmovq   xmm0,(rax)\n\t"
15479        $$emit$$"add     0x8,rax\n\t"
15480        $$emit$$"dec     rcx\n\t"
15481        $$emit$$"jge     L_sloop\n\t"
15482        $$emit$$"# L_end:\n\t"
15483     } else {
15484        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15485     }
15486     $$emit$$"# DONE"
15487   %}
15488   ins_encode %{
15489     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15490                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15491   %}
15492   ins_pipe(pipe_slow);
15493 %}
15494 
15495 // Large non-constant length ClearArray for non-AVX512 targets.
15496 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15497                         Universe dummy, rFlagsReg cr)
15498 %{
15499   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15500   match(Set dummy (ClearArray (Binary cnt base) val));
15501   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15502 
15503   format %{ $$template
15504     if (UseFastStosb) {
15505        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15506        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15507     } else if (UseXMMForObjInit) {
15508        $$emit$$"movdq   $tmp, $val\n\t"
15509        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15510        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15511        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15512        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15513        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15514        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15515        $$emit$$"add     0x40,rax\n\t"
15516        $$emit$$"# L_zero_64_bytes:\n\t"
15517        $$emit$$"sub     0x8,rcx\n\t"
15518        $$emit$$"jge     L_loop\n\t"
15519        $$emit$$"add     0x4,rcx\n\t"
15520        $$emit$$"jl      L_tail\n\t"
15521        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15522        $$emit$$"add     0x20,rax\n\t"
15523        $$emit$$"sub     0x4,rcx\n\t"
15524        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15525        $$emit$$"add     0x4,rcx\n\t"
15526        $$emit$$"jle     L_end\n\t"
15527        $$emit$$"dec     rcx\n\t"
15528        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15529        $$emit$$"vmovq   xmm0,(rax)\n\t"
15530        $$emit$$"add     0x8,rax\n\t"
15531        $$emit$$"dec     rcx\n\t"
15532        $$emit$$"jge     L_sloop\n\t"
15533        $$emit$$"# L_end:\n\t"
15534     } else {
15535        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15536     }
15537   %}
15538   ins_encode %{
15539     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15540                  $tmp$$XMMRegister, true, false);
15541   %}
15542   ins_pipe(pipe_slow);
15543 %}
15544 
15545 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15546                                   Universe dummy, rFlagsReg cr)
15547 %{
15548   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15549   match(Set dummy (ClearArray (Binary cnt base) val));
15550   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15551 
15552   format %{ $$template
15553     if (UseXMMForObjInit) {
15554        $$emit$$"movdq   $tmp, $val\n\t"
15555        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15556        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15557        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15558        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15559        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15560        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15561        $$emit$$"add     0x40,rax\n\t"
15562        $$emit$$"# L_zero_64_bytes:\n\t"
15563        $$emit$$"sub     0x8,rcx\n\t"
15564        $$emit$$"jge     L_loop\n\t"
15565        $$emit$$"add     0x4,rcx\n\t"
15566        $$emit$$"jl      L_tail\n\t"
15567        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15568        $$emit$$"add     0x20,rax\n\t"
15569        $$emit$$"sub     0x4,rcx\n\t"
15570        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15571        $$emit$$"add     0x4,rcx\n\t"
15572        $$emit$$"jle     L_end\n\t"
15573        $$emit$$"dec     rcx\n\t"
15574        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15575        $$emit$$"vmovq   xmm0,(rax)\n\t"
15576        $$emit$$"add     0x8,rax\n\t"
15577        $$emit$$"dec     rcx\n\t"
15578        $$emit$$"jge     L_sloop\n\t"
15579        $$emit$$"# L_end:\n\t"
15580     } else {
15581        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15582     }
15583   %}
15584   ins_encode %{
15585     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15586                  $tmp$$XMMRegister, true, true);
15587   %}
15588   ins_pipe(pipe_slow);
15589 %}
15590 
15591 // Large non-constant length ClearArray for AVX512 targets.
15592 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15593                              Universe dummy, rFlagsReg cr)
15594 %{
15595   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15596   match(Set dummy (ClearArray (Binary cnt base) val));
15597   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15598 
15599   format %{ $$template
15600     if (UseFastStosb) {
15601        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15602        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15603        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15604     } else if (UseXMMForObjInit) {
15605        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15606        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15607        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15608        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15609        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15610        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15611        $$emit$$"add     0x40,rax\n\t"
15612        $$emit$$"# L_zero_64_bytes:\n\t"
15613        $$emit$$"sub     0x8,rcx\n\t"
15614        $$emit$$"jge     L_loop\n\t"
15615        $$emit$$"add     0x4,rcx\n\t"
15616        $$emit$$"jl      L_tail\n\t"
15617        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15618        $$emit$$"add     0x20,rax\n\t"
15619        $$emit$$"sub     0x4,rcx\n\t"
15620        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15621        $$emit$$"add     0x4,rcx\n\t"
15622        $$emit$$"jle     L_end\n\t"
15623        $$emit$$"dec     rcx\n\t"
15624        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15625        $$emit$$"vmovq   xmm0,(rax)\n\t"
15626        $$emit$$"add     0x8,rax\n\t"
15627        $$emit$$"dec     rcx\n\t"
15628        $$emit$$"jge     L_sloop\n\t"
15629        $$emit$$"# L_end:\n\t"
15630     } else {
15631        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15632        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15633     }
15634   %}
15635   ins_encode %{
15636     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15637                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15638   %}
15639   ins_pipe(pipe_slow);
15640 %}
15641 
15642 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15643                                        Universe dummy, rFlagsReg cr)
15644 %{
15645   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15646   match(Set dummy (ClearArray (Binary cnt base) val));
15647   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15648 
15649   format %{ $$template
15650     if (UseFastStosb) {
15651        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15652        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15653        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15654     } else if (UseXMMForObjInit) {
15655        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15656        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15657        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15658        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15659        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15660        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15661        $$emit$$"add     0x40,rax\n\t"
15662        $$emit$$"# L_zero_64_bytes:\n\t"
15663        $$emit$$"sub     0x8,rcx\n\t"
15664        $$emit$$"jge     L_loop\n\t"
15665        $$emit$$"add     0x4,rcx\n\t"
15666        $$emit$$"jl      L_tail\n\t"
15667        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15668        $$emit$$"add     0x20,rax\n\t"
15669        $$emit$$"sub     0x4,rcx\n\t"
15670        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15671        $$emit$$"add     0x4,rcx\n\t"
15672        $$emit$$"jle     L_end\n\t"
15673        $$emit$$"dec     rcx\n\t"
15674        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15675        $$emit$$"vmovq   xmm0,(rax)\n\t"
15676        $$emit$$"add     0x8,rax\n\t"
15677        $$emit$$"dec     rcx\n\t"
15678        $$emit$$"jge     L_sloop\n\t"
15679        $$emit$$"# L_end:\n\t"
15680     } else {
15681        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15682        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15683     }
15684   %}
15685   ins_encode %{
15686     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15687                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15688   %}
15689   ins_pipe(pipe_slow);
15690 %}
15691 
15692 // Small constant length ClearArray for AVX512 targets.
15693 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15694 %{
15695   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15696             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15697   match(Set dummy (ClearArray (Binary cnt base) val));
15698   ins_cost(100);
15699   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15700   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15701   ins_encode %{
15702     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15703   %}
15704   ins_pipe(pipe_slow);
15705 %}
15706 
15707 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15708                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15709 %{
15710   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15711   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15712   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15713 
15714   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15715   ins_encode %{
15716     __ string_compare($str1$$Register, $str2$$Register,
15717                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15718                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15719   %}
15720   ins_pipe( pipe_slow );
15721 %}
15722 
15723 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15724                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15725 %{
15726   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15727   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15728   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15729 
15730   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15731   ins_encode %{
15732     __ string_compare($str1$$Register, $str2$$Register,
15733                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15734                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15735   %}
15736   ins_pipe( pipe_slow );
15737 %}
15738 
15739 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15740                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15741 %{
15742   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15743   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15744   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15745 
15746   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15747   ins_encode %{
15748     __ string_compare($str1$$Register, $str2$$Register,
15749                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15750                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15751   %}
15752   ins_pipe( pipe_slow );
15753 %}
15754 
15755 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15756                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15757 %{
15758   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15759   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15760   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15761 
15762   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15763   ins_encode %{
15764     __ string_compare($str1$$Register, $str2$$Register,
15765                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15766                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15767   %}
15768   ins_pipe( pipe_slow );
15769 %}
15770 
15771 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15772                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15773 %{
15774   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15775   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15776   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15777 
15778   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15779   ins_encode %{
15780     __ string_compare($str1$$Register, $str2$$Register,
15781                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15782                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15783   %}
15784   ins_pipe( pipe_slow );
15785 %}
15786 
15787 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15788                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15789 %{
15790   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15791   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15792   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15793 
15794   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15795   ins_encode %{
15796     __ string_compare($str1$$Register, $str2$$Register,
15797                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15798                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15799   %}
15800   ins_pipe( pipe_slow );
15801 %}
15802 
15803 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15804                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15805 %{
15806   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15807   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15808   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15809 
15810   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15811   ins_encode %{
15812     __ string_compare($str2$$Register, $str1$$Register,
15813                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15814                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15815   %}
15816   ins_pipe( pipe_slow );
15817 %}
15818 
15819 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15820                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15821 %{
15822   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15823   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15824   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15825 
15826   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15827   ins_encode %{
15828     __ string_compare($str2$$Register, $str1$$Register,
15829                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15830                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15831   %}
15832   ins_pipe( pipe_slow );
15833 %}
15834 
15835 // fast search of substring with known size.
15836 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15837                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15838 %{
15839   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15840   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15841   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15842 
15843   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15844   ins_encode %{
15845     int icnt2 = (int)$int_cnt2$$constant;
15846     if (icnt2 >= 16) {
15847       // IndexOf for constant substrings with size >= 16 elements
15848       // which don't need to be loaded through stack.
15849       __ string_indexofC8($str1$$Register, $str2$$Register,
15850                           $cnt1$$Register, $cnt2$$Register,
15851                           icnt2, $result$$Register,
15852                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15853     } else {
15854       // Small strings are loaded through stack if they cross page boundary.
15855       __ string_indexof($str1$$Register, $str2$$Register,
15856                         $cnt1$$Register, $cnt2$$Register,
15857                         icnt2, $result$$Register,
15858                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15859     }
15860   %}
15861   ins_pipe( pipe_slow );
15862 %}
15863 
15864 // fast search of substring with known size.
15865 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15866                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15867 %{
15868   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15869   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15870   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15871 
15872   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15873   ins_encode %{
15874     int icnt2 = (int)$int_cnt2$$constant;
15875     if (icnt2 >= 8) {
15876       // IndexOf for constant substrings with size >= 8 elements
15877       // which don't need to be loaded through stack.
15878       __ string_indexofC8($str1$$Register, $str2$$Register,
15879                           $cnt1$$Register, $cnt2$$Register,
15880                           icnt2, $result$$Register,
15881                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15882     } else {
15883       // Small strings are loaded through stack if they cross page boundary.
15884       __ string_indexof($str1$$Register, $str2$$Register,
15885                         $cnt1$$Register, $cnt2$$Register,
15886                         icnt2, $result$$Register,
15887                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15888     }
15889   %}
15890   ins_pipe( pipe_slow );
15891 %}
15892 
15893 // fast search of substring with known size.
15894 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15895                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15896 %{
15897   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15898   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15899   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15900 
15901   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15902   ins_encode %{
15903     int icnt2 = (int)$int_cnt2$$constant;
15904     if (icnt2 >= 8) {
15905       // IndexOf for constant substrings with size >= 8 elements
15906       // which don't need to be loaded through stack.
15907       __ string_indexofC8($str1$$Register, $str2$$Register,
15908                           $cnt1$$Register, $cnt2$$Register,
15909                           icnt2, $result$$Register,
15910                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15911     } else {
15912       // Small strings are loaded through stack if they cross page boundary.
15913       __ string_indexof($str1$$Register, $str2$$Register,
15914                         $cnt1$$Register, $cnt2$$Register,
15915                         icnt2, $result$$Register,
15916                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15917     }
15918   %}
15919   ins_pipe( pipe_slow );
15920 %}
15921 
15922 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15923                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15924 %{
15925   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15926   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15927   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15928 
15929   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15930   ins_encode %{
15931     __ string_indexof($str1$$Register, $str2$$Register,
15932                       $cnt1$$Register, $cnt2$$Register,
15933                       (-1), $result$$Register,
15934                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15935   %}
15936   ins_pipe( pipe_slow );
15937 %}
15938 
15939 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15940                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15941 %{
15942   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15943   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15944   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15945 
15946   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15947   ins_encode %{
15948     __ string_indexof($str1$$Register, $str2$$Register,
15949                       $cnt1$$Register, $cnt2$$Register,
15950                       (-1), $result$$Register,
15951                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15952   %}
15953   ins_pipe( pipe_slow );
15954 %}
15955 
15956 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15957                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15958 %{
15959   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15960   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15961   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15962 
15963   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15964   ins_encode %{
15965     __ string_indexof($str1$$Register, $str2$$Register,
15966                       $cnt1$$Register, $cnt2$$Register,
15967                       (-1), $result$$Register,
15968                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15969   %}
15970   ins_pipe( pipe_slow );
15971 %}
15972 
15973 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15974                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15975 %{
15976   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15977   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15978   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15979   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15980   ins_encode %{
15981     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15982                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15983   %}
15984   ins_pipe( pipe_slow );
15985 %}
15986 
15987 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15988                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15989 %{
15990   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15991   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15992   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15993   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15994   ins_encode %{
15995     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15996                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15997   %}
15998   ins_pipe( pipe_slow );
15999 %}
16000 
16001 // fast string equals
16002 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16003                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
16004 %{
16005   predicate(!VM_Version::supports_avx512vlbw());
16006   match(Set result (StrEquals (Binary str1 str2) cnt));
16007   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16008 
16009   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
16010   ins_encode %{
16011     __ arrays_equals(false, $str1$$Register, $str2$$Register,
16012                      $cnt$$Register, $result$$Register, $tmp3$$Register,
16013                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16014   %}
16015   ins_pipe( pipe_slow );
16016 %}
16017 
16018 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16019                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
16020 %{
16021   predicate(VM_Version::supports_avx512vlbw());
16022   match(Set result (StrEquals (Binary str1 str2) cnt));
16023   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16024 
16025   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
16026   ins_encode %{
16027     __ arrays_equals(false, $str1$$Register, $str2$$Register,
16028                      $cnt$$Register, $result$$Register, $tmp3$$Register,
16029                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16030   %}
16031   ins_pipe( pipe_slow );
16032 %}
16033 
16034 // fast array equals
16035 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16036                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16037 %{
16038   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16039   match(Set result (AryEq ary1 ary2));
16040   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16041 
16042   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16043   ins_encode %{
16044     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16045                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16046                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16047   %}
16048   ins_pipe( pipe_slow );
16049 %}
16050 
16051 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16052                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16053 %{
16054   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16055   match(Set result (AryEq ary1 ary2));
16056   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16057 
16058   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16059   ins_encode %{
16060     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16061                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16062                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16063   %}
16064   ins_pipe( pipe_slow );
16065 %}
16066 
16067 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16068                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16069 %{
16070   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16071   match(Set result (AryEq ary1 ary2));
16072   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16073 
16074   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16075   ins_encode %{
16076     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16077                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16078                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
16079   %}
16080   ins_pipe( pipe_slow );
16081 %}
16082 
16083 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16084                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16085 %{
16086   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16087   match(Set result (AryEq ary1 ary2));
16088   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16089 
16090   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16091   ins_encode %{
16092     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16093                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16094                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
16095   %}
16096   ins_pipe( pipe_slow );
16097 %}
16098 
16099 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
16100                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
16101                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
16102                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
16103                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
16104 %{
16105   predicate(UseAVX >= 2);
16106   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
16107   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
16108          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
16109          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
16110          USE basic_type, KILL cr);
16111 
16112   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
16113   ins_encode %{
16114     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
16115                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16116                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
16117                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
16118                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
16119                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
16120                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
16121   %}
16122   ins_pipe( pipe_slow );
16123 %}
16124 
16125 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16126                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
16127 %{
16128   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16129   match(Set result (CountPositives ary1 len));
16130   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16131 
16132   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
16133   ins_encode %{
16134     __ count_positives($ary1$$Register, $len$$Register,
16135                        $result$$Register, $tmp3$$Register,
16136                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
16137   %}
16138   ins_pipe( pipe_slow );
16139 %}
16140 
16141 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16142                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
16143 %{
16144   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16145   match(Set result (CountPositives ary1 len));
16146   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16147 
16148   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
16149   ins_encode %{
16150     __ count_positives($ary1$$Register, $len$$Register,
16151                        $result$$Register, $tmp3$$Register,
16152                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
16153   %}
16154   ins_pipe( pipe_slow );
16155 %}
16156 
16157 // fast char[] to byte[] compression
16158 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16159                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16160   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16161   match(Set result (StrCompressedCopy src (Binary dst len)));
16162   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
16163          USE_KILL len, KILL tmp5, KILL cr);
16164 
16165   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
16166   ins_encode %{
16167     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16168                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16169                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16170                            knoreg, knoreg);
16171   %}
16172   ins_pipe( pipe_slow );
16173 %}
16174 
16175 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16176                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16177   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16178   match(Set result (StrCompressedCopy src (Binary dst len)));
16179   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
16180          USE_KILL len, KILL tmp5, KILL cr);
16181 
16182   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
16183   ins_encode %{
16184     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16185                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16186                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16187                            $ktmp1$$KRegister, $ktmp2$$KRegister);
16188   %}
16189   ins_pipe( pipe_slow );
16190 %}
16191 // fast byte[] to char[] inflation
16192 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16193                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
16194   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16195   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16196   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16197 
16198   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16199   ins_encode %{
16200     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16201                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
16202   %}
16203   ins_pipe( pipe_slow );
16204 %}
16205 
16206 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16207                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
16208   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16209   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16210   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16211 
16212   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16213   ins_encode %{
16214     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16215                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
16216   %}
16217   ins_pipe( pipe_slow );
16218 %}
16219 
16220 // encode char[] to byte[] in ISO_8859_1
16221 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16222                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16223                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16224   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
16225   match(Set result (EncodeISOArray src (Binary dst len)));
16226   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16227 
16228   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16229   ins_encode %{
16230     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16231                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16232                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
16233   %}
16234   ins_pipe( pipe_slow );
16235 %}
16236 
16237 // encode char[] to byte[] in ASCII
16238 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16239                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16240                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16241   predicate(((EncodeISOArrayNode*)n)->is_ascii());
16242   match(Set result (EncodeISOArray src (Binary dst len)));
16243   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16244 
16245   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16246   ins_encode %{
16247     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16248                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16249                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16250   %}
16251   ins_pipe( pipe_slow );
16252 %}
16253 
16254 //----------Overflow Math Instructions-----------------------------------------
16255 
16256 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16257 %{
16258   match(Set cr (OverflowAddI op1 op2));
16259   effect(DEF cr, USE_KILL op1, USE op2);
16260 
16261   format %{ "addl    $op1, $op2\t# overflow check int" %}
16262 
16263   ins_encode %{
16264     __ addl($op1$$Register, $op2$$Register);
16265   %}
16266   ins_pipe(ialu_reg_reg);
16267 %}
16268 
16269 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16270 %{
16271   match(Set cr (OverflowAddI op1 op2));
16272   effect(DEF cr, USE_KILL op1, USE op2);
16273 
16274   format %{ "addl    $op1, $op2\t# overflow check int" %}
16275 
16276   ins_encode %{
16277     __ addl($op1$$Register, $op2$$constant);
16278   %}
16279   ins_pipe(ialu_reg_reg);
16280 %}
16281 
16282 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16283 %{
16284   match(Set cr (OverflowAddL op1 op2));
16285   effect(DEF cr, USE_KILL op1, USE op2);
16286 
16287   format %{ "addq    $op1, $op2\t# overflow check long" %}
16288   ins_encode %{
16289     __ addq($op1$$Register, $op2$$Register);
16290   %}
16291   ins_pipe(ialu_reg_reg);
16292 %}
16293 
16294 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16295 %{
16296   match(Set cr (OverflowAddL op1 op2));
16297   effect(DEF cr, USE_KILL op1, USE op2);
16298 
16299   format %{ "addq    $op1, $op2\t# overflow check long" %}
16300   ins_encode %{
16301     __ addq($op1$$Register, $op2$$constant);
16302   %}
16303   ins_pipe(ialu_reg_reg);
16304 %}
16305 
16306 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16307 %{
16308   match(Set cr (OverflowSubI op1 op2));
16309 
16310   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16311   ins_encode %{
16312     __ cmpl($op1$$Register, $op2$$Register);
16313   %}
16314   ins_pipe(ialu_reg_reg);
16315 %}
16316 
16317 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16318 %{
16319   match(Set cr (OverflowSubI op1 op2));
16320 
16321   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16322   ins_encode %{
16323     __ cmpl($op1$$Register, $op2$$constant);
16324   %}
16325   ins_pipe(ialu_reg_reg);
16326 %}
16327 
16328 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16329 %{
16330   match(Set cr (OverflowSubL op1 op2));
16331 
16332   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16333   ins_encode %{
16334     __ cmpq($op1$$Register, $op2$$Register);
16335   %}
16336   ins_pipe(ialu_reg_reg);
16337 %}
16338 
16339 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16340 %{
16341   match(Set cr (OverflowSubL op1 op2));
16342 
16343   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16344   ins_encode %{
16345     __ cmpq($op1$$Register, $op2$$constant);
16346   %}
16347   ins_pipe(ialu_reg_reg);
16348 %}
16349 
16350 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16351 %{
16352   match(Set cr (OverflowSubI zero op2));
16353   effect(DEF cr, USE_KILL op2);
16354 
16355   format %{ "negl    $op2\t# overflow check int" %}
16356   ins_encode %{
16357     __ negl($op2$$Register);
16358   %}
16359   ins_pipe(ialu_reg_reg);
16360 %}
16361 
16362 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16363 %{
16364   match(Set cr (OverflowSubL zero op2));
16365   effect(DEF cr, USE_KILL op2);
16366 
16367   format %{ "negq    $op2\t# overflow check long" %}
16368   ins_encode %{
16369     __ negq($op2$$Register);
16370   %}
16371   ins_pipe(ialu_reg_reg);
16372 %}
16373 
16374 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16375 %{
16376   match(Set cr (OverflowMulI op1 op2));
16377   effect(DEF cr, USE_KILL op1, USE op2);
16378 
16379   format %{ "imull    $op1, $op2\t# overflow check int" %}
16380   ins_encode %{
16381     __ imull($op1$$Register, $op2$$Register);
16382   %}
16383   ins_pipe(ialu_reg_reg_alu0);
16384 %}
16385 
16386 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16387 %{
16388   match(Set cr (OverflowMulI op1 op2));
16389   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16390 
16391   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
16392   ins_encode %{
16393     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16394   %}
16395   ins_pipe(ialu_reg_reg_alu0);
16396 %}
16397 
16398 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16399 %{
16400   match(Set cr (OverflowMulL op1 op2));
16401   effect(DEF cr, USE_KILL op1, USE op2);
16402 
16403   format %{ "imulq    $op1, $op2\t# overflow check long" %}
16404   ins_encode %{
16405     __ imulq($op1$$Register, $op2$$Register);
16406   %}
16407   ins_pipe(ialu_reg_reg_alu0);
16408 %}
16409 
16410 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16411 %{
16412   match(Set cr (OverflowMulL op1 op2));
16413   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16414 
16415   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
16416   ins_encode %{
16417     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16418   %}
16419   ins_pipe(ialu_reg_reg_alu0);
16420 %}
16421 
16422 
16423 //----------Control Flow Instructions------------------------------------------
16424 // Signed compare Instructions
16425 
16426 // XXX more variants!!
16427 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16428 %{
16429   match(Set cr (CmpI op1 op2));
16430   effect(DEF cr, USE op1, USE op2);
16431 
16432   format %{ "cmpl    $op1, $op2" %}
16433   ins_encode %{
16434     __ cmpl($op1$$Register, $op2$$Register);
16435   %}
16436   ins_pipe(ialu_cr_reg_reg);
16437 %}
16438 
16439 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16440 %{
16441   match(Set cr (CmpI op1 op2));
16442 
16443   format %{ "cmpl    $op1, $op2" %}
16444   ins_encode %{
16445     __ cmpl($op1$$Register, $op2$$constant);
16446   %}
16447   ins_pipe(ialu_cr_reg_imm);
16448 %}
16449 
16450 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16451 %{
16452   match(Set cr (CmpI op1 (LoadI op2)));
16453 
16454   ins_cost(500); // XXX
16455   format %{ "cmpl    $op1, $op2" %}
16456   ins_encode %{
16457     __ cmpl($op1$$Register, $op2$$Address);
16458   %}
16459   ins_pipe(ialu_cr_reg_mem);
16460 %}
16461 
16462 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16463 %{
16464   match(Set cr (CmpI src zero));
16465 
16466   format %{ "testl   $src, $src" %}
16467   ins_encode %{
16468     __ testl($src$$Register, $src$$Register);
16469   %}
16470   ins_pipe(ialu_cr_reg_imm);
16471 %}
16472 
16473 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16474 %{
16475   match(Set cr (CmpI (AndI src con) zero));
16476 
16477   format %{ "testl   $src, $con" %}
16478   ins_encode %{
16479     __ testl($src$$Register, $con$$constant);
16480   %}
16481   ins_pipe(ialu_cr_reg_imm);
16482 %}
16483 
16484 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16485 %{
16486   match(Set cr (CmpI (AndI src1 src2) zero));
16487 
16488   format %{ "testl   $src1, $src2" %}
16489   ins_encode %{
16490     __ testl($src1$$Register, $src2$$Register);
16491   %}
16492   ins_pipe(ialu_cr_reg_imm);
16493 %}
16494 
16495 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16496 %{
16497   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16498 
16499   format %{ "testl   $src, $mem" %}
16500   ins_encode %{
16501     __ testl($src$$Register, $mem$$Address);
16502   %}
16503   ins_pipe(ialu_cr_reg_mem);
16504 %}
16505 
16506 // Unsigned compare Instructions; really, same as signed except they
16507 // produce an rFlagsRegU instead of rFlagsReg.
16508 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16509 %{
16510   match(Set cr (CmpU op1 op2));
16511 
16512   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16513   ins_encode %{
16514     __ cmpl($op1$$Register, $op2$$Register);
16515   %}
16516   ins_pipe(ialu_cr_reg_reg);
16517 %}
16518 
16519 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16520 %{
16521   match(Set cr (CmpU op1 op2));
16522 
16523   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16524   ins_encode %{
16525     __ cmpl($op1$$Register, $op2$$constant);
16526   %}
16527   ins_pipe(ialu_cr_reg_imm);
16528 %}
16529 
16530 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16531 %{
16532   match(Set cr (CmpU op1 (LoadI op2)));
16533 
16534   ins_cost(500); // XXX
16535   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16536   ins_encode %{
16537     __ cmpl($op1$$Register, $op2$$Address);
16538   %}
16539   ins_pipe(ialu_cr_reg_mem);
16540 %}
16541 
16542 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16543 %{
16544   match(Set cr (CmpU src zero));
16545 
16546   format %{ "testl   $src, $src\t# unsigned" %}
16547   ins_encode %{
16548     __ testl($src$$Register, $src$$Register);
16549   %}
16550   ins_pipe(ialu_cr_reg_imm);
16551 %}
16552 
16553 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16554 %{
16555   match(Set cr (CmpP op1 op2));
16556 
16557   format %{ "cmpq    $op1, $op2\t# ptr" %}
16558   ins_encode %{
16559     __ cmpq($op1$$Register, $op2$$Register);
16560   %}
16561   ins_pipe(ialu_cr_reg_reg);
16562 %}
16563 
16564 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16565 %{
16566   match(Set cr (CmpP op1 (LoadP op2)));
16567   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16568 
16569   ins_cost(500); // XXX
16570   format %{ "cmpq    $op1, $op2\t# ptr" %}
16571   ins_encode %{
16572     __ cmpq($op1$$Register, $op2$$Address);
16573   %}
16574   ins_pipe(ialu_cr_reg_mem);
16575 %}
16576 
16577 // XXX this is generalized by compP_rReg_mem???
16578 // Compare raw pointer (used in out-of-heap check).
16579 // Only works because non-oop pointers must be raw pointers
16580 // and raw pointers have no anti-dependencies.
16581 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16582 %{
16583   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16584             n->in(2)->as_Load()->barrier_data() == 0);
16585   match(Set cr (CmpP op1 (LoadP op2)));
16586 
16587   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16588   ins_encode %{
16589     __ cmpq($op1$$Register, $op2$$Address);
16590   %}
16591   ins_pipe(ialu_cr_reg_mem);
16592 %}
16593 
16594 // This will generate a signed flags result. This should be OK since
16595 // any compare to a zero should be eq/neq.
16596 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16597 %{
16598   match(Set cr (CmpP src zero));
16599 
16600   format %{ "testq   $src, $src\t# ptr" %}
16601   ins_encode %{
16602     __ testq($src$$Register, $src$$Register);
16603   %}
16604   ins_pipe(ialu_cr_reg_imm);
16605 %}
16606 
16607 // This will generate a signed flags result. This should be OK since
16608 // any compare to a zero should be eq/neq.
16609 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16610 %{
16611   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16612             n->in(1)->as_Load()->barrier_data() == 0);
16613   match(Set cr (CmpP (LoadP op) zero));
16614 
16615   ins_cost(500); // XXX
16616   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16617   ins_encode %{
16618     __ testq($op$$Address, 0xFFFFFFFF);
16619   %}
16620   ins_pipe(ialu_cr_reg_imm);
16621 %}
16622 
16623 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16624 %{
16625   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16626             n->in(1)->as_Load()->barrier_data() == 0);
16627   match(Set cr (CmpP (LoadP mem) zero));
16628 
16629   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16630   ins_encode %{
16631     __ cmpq(r12, $mem$$Address);
16632   %}
16633   ins_pipe(ialu_cr_reg_mem);
16634 %}
16635 
16636 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16637 %{
16638   match(Set cr (CmpN op1 op2));
16639 
16640   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16641   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16642   ins_pipe(ialu_cr_reg_reg);
16643 %}
16644 
16645 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16646 %{
16647   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16648   match(Set cr (CmpN src (LoadN mem)));
16649 
16650   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16651   ins_encode %{
16652     __ cmpl($src$$Register, $mem$$Address);
16653   %}
16654   ins_pipe(ialu_cr_reg_mem);
16655 %}
16656 
16657 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16658   match(Set cr (CmpN op1 op2));
16659 
16660   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16661   ins_encode %{
16662     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16663   %}
16664   ins_pipe(ialu_cr_reg_imm);
16665 %}
16666 
16667 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16668 %{
16669   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16670   match(Set cr (CmpN src (LoadN mem)));
16671 
16672   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16673   ins_encode %{
16674     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16675   %}
16676   ins_pipe(ialu_cr_reg_mem);
16677 %}
16678 
16679 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16680   match(Set cr (CmpN op1 op2));
16681 
16682   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16683   ins_encode %{
16684     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16685   %}
16686   ins_pipe(ialu_cr_reg_imm);
16687 %}
16688 
16689 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16690 %{
16691   predicate(!UseCompactObjectHeaders);
16692   match(Set cr (CmpN src (LoadNKlass mem)));
16693 
16694   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16695   ins_encode %{
16696     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16697   %}
16698   ins_pipe(ialu_cr_reg_mem);
16699 %}
16700 
16701 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16702   match(Set cr (CmpN src zero));
16703 
16704   format %{ "testl   $src, $src\t# compressed ptr" %}
16705   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16706   ins_pipe(ialu_cr_reg_imm);
16707 %}
16708 
16709 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16710 %{
16711   predicate(CompressedOops::base() != nullptr &&
16712             n->in(1)->as_Load()->barrier_data() == 0);
16713   match(Set cr (CmpN (LoadN mem) zero));
16714 
16715   ins_cost(500); // XXX
16716   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16717   ins_encode %{
16718     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16719   %}
16720   ins_pipe(ialu_cr_reg_mem);
16721 %}
16722 
16723 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16724 %{
16725   predicate(CompressedOops::base() == nullptr &&
16726             n->in(1)->as_Load()->barrier_data() == 0);
16727   match(Set cr (CmpN (LoadN mem) zero));
16728 
16729   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16730   ins_encode %{
16731     __ cmpl(r12, $mem$$Address);
16732   %}
16733   ins_pipe(ialu_cr_reg_mem);
16734 %}
16735 
16736 // Yanked all unsigned pointer compare operations.
16737 // Pointer compares are done with CmpP which is already unsigned.
16738 
16739 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16740 %{
16741   match(Set cr (CmpL op1 op2));
16742 
16743   format %{ "cmpq    $op1, $op2" %}
16744   ins_encode %{
16745     __ cmpq($op1$$Register, $op2$$Register);
16746   %}
16747   ins_pipe(ialu_cr_reg_reg);
16748 %}
16749 
16750 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16751 %{
16752   match(Set cr (CmpL op1 op2));
16753 
16754   format %{ "cmpq    $op1, $op2" %}
16755   ins_encode %{
16756     __ cmpq($op1$$Register, $op2$$constant);
16757   %}
16758   ins_pipe(ialu_cr_reg_imm);
16759 %}
16760 
16761 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16762 %{
16763   match(Set cr (CmpL op1 (LoadL op2)));
16764 
16765   format %{ "cmpq    $op1, $op2" %}
16766   ins_encode %{
16767     __ cmpq($op1$$Register, $op2$$Address);
16768   %}
16769   ins_pipe(ialu_cr_reg_mem);
16770 %}
16771 
16772 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16773 %{
16774   match(Set cr (CmpL src zero));
16775 
16776   format %{ "testq   $src, $src" %}
16777   ins_encode %{
16778     __ testq($src$$Register, $src$$Register);
16779   %}
16780   ins_pipe(ialu_cr_reg_imm);
16781 %}
16782 
16783 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16784 %{
16785   match(Set cr (CmpL (AndL src con) zero));
16786 
16787   format %{ "testq   $src, $con\t# long" %}
16788   ins_encode %{
16789     __ testq($src$$Register, $con$$constant);
16790   %}
16791   ins_pipe(ialu_cr_reg_imm);
16792 %}
16793 
16794 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16795 %{
16796   match(Set cr (CmpL (AndL src1 src2) zero));
16797 
16798   format %{ "testq   $src1, $src2\t# long" %}
16799   ins_encode %{
16800     __ testq($src1$$Register, $src2$$Register);
16801   %}
16802   ins_pipe(ialu_cr_reg_imm);
16803 %}
16804 
16805 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16806 %{
16807   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16808 
16809   format %{ "testq   $src, $mem" %}
16810   ins_encode %{
16811     __ testq($src$$Register, $mem$$Address);
16812   %}
16813   ins_pipe(ialu_cr_reg_mem);
16814 %}
16815 
16816 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16817 %{
16818   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16819 
16820   format %{ "testq   $src, $mem" %}
16821   ins_encode %{
16822     __ testq($src$$Register, $mem$$Address);
16823   %}
16824   ins_pipe(ialu_cr_reg_mem);
16825 %}
16826 
16827 // Manifest a CmpU result in an integer register.  Very painful.
16828 // This is the test to avoid.
16829 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16830 %{
16831   match(Set dst (CmpU3 src1 src2));
16832   effect(KILL flags);
16833 
16834   ins_cost(275); // XXX
16835   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16836             "movl    $dst, -1\n\t"
16837             "jb,u    done\n\t"
16838             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16839     "done:" %}
16840   ins_encode %{
16841     Label done;
16842     __ cmpl($src1$$Register, $src2$$Register);
16843     __ movl($dst$$Register, -1);
16844     __ jccb(Assembler::below, done);
16845     __ setcc(Assembler::notZero, $dst$$Register);
16846     __ bind(done);
16847   %}
16848   ins_pipe(pipe_slow);
16849 %}
16850 
16851 // Manifest a CmpL result in an integer register.  Very painful.
16852 // This is the test to avoid.
16853 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16854 %{
16855   match(Set dst (CmpL3 src1 src2));
16856   effect(KILL flags);
16857 
16858   ins_cost(275); // XXX
16859   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16860             "movl    $dst, -1\n\t"
16861             "jl,s    done\n\t"
16862             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16863     "done:" %}
16864   ins_encode %{
16865     Label done;
16866     __ cmpq($src1$$Register, $src2$$Register);
16867     __ movl($dst$$Register, -1);
16868     __ jccb(Assembler::less, done);
16869     __ setcc(Assembler::notZero, $dst$$Register);
16870     __ bind(done);
16871   %}
16872   ins_pipe(pipe_slow);
16873 %}
16874 
16875 // Manifest a CmpUL result in an integer register.  Very painful.
16876 // This is the test to avoid.
16877 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16878 %{
16879   match(Set dst (CmpUL3 src1 src2));
16880   effect(KILL flags);
16881 
16882   ins_cost(275); // XXX
16883   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16884             "movl    $dst, -1\n\t"
16885             "jb,u    done\n\t"
16886             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16887     "done:" %}
16888   ins_encode %{
16889     Label done;
16890     __ cmpq($src1$$Register, $src2$$Register);
16891     __ movl($dst$$Register, -1);
16892     __ jccb(Assembler::below, done);
16893     __ setcc(Assembler::notZero, $dst$$Register);
16894     __ bind(done);
16895   %}
16896   ins_pipe(pipe_slow);
16897 %}
16898 
16899 // Unsigned long compare Instructions; really, same as signed long except they
16900 // produce an rFlagsRegU instead of rFlagsReg.
16901 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16902 %{
16903   match(Set cr (CmpUL op1 op2));
16904 
16905   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16906   ins_encode %{
16907     __ cmpq($op1$$Register, $op2$$Register);
16908   %}
16909   ins_pipe(ialu_cr_reg_reg);
16910 %}
16911 
16912 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16913 %{
16914   match(Set cr (CmpUL op1 op2));
16915 
16916   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16917   ins_encode %{
16918     __ cmpq($op1$$Register, $op2$$constant);
16919   %}
16920   ins_pipe(ialu_cr_reg_imm);
16921 %}
16922 
16923 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16924 %{
16925   match(Set cr (CmpUL op1 (LoadL op2)));
16926 
16927   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16928   ins_encode %{
16929     __ cmpq($op1$$Register, $op2$$Address);
16930   %}
16931   ins_pipe(ialu_cr_reg_mem);
16932 %}
16933 
16934 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16935 %{
16936   match(Set cr (CmpUL src zero));
16937 
16938   format %{ "testq   $src, $src\t# unsigned" %}
16939   ins_encode %{
16940     __ testq($src$$Register, $src$$Register);
16941   %}
16942   ins_pipe(ialu_cr_reg_imm);
16943 %}
16944 
16945 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16946 %{
16947   match(Set cr (CmpI (LoadB mem) imm));
16948 
16949   ins_cost(125);
16950   format %{ "cmpb    $mem, $imm" %}
16951   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16952   ins_pipe(ialu_cr_reg_mem);
16953 %}
16954 
16955 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16956 %{
16957   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16958 
16959   ins_cost(125);
16960   format %{ "testb   $mem, $imm\t# ubyte" %}
16961   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16962   ins_pipe(ialu_cr_reg_mem);
16963 %}
16964 
16965 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16966 %{
16967   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16968 
16969   ins_cost(125);
16970   format %{ "testb   $mem, $imm\t# byte" %}
16971   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16972   ins_pipe(ialu_cr_reg_mem);
16973 %}
16974 
16975 //----------Max and Min--------------------------------------------------------
16976 // Min Instructions
16977 
16978 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16979 %{
16980   predicate(!UseAPX);
16981   effect(USE_DEF dst, USE src, USE cr);
16982 
16983   format %{ "cmovlgt $dst, $src\t# min" %}
16984   ins_encode %{
16985     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16986   %}
16987   ins_pipe(pipe_cmov_reg);
16988 %}
16989 
16990 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16991 %{
16992   predicate(UseAPX);
16993   effect(DEF dst, USE src1, USE src2, USE cr);
16994 
16995   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16996   ins_encode %{
16997     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16998   %}
16999   ins_pipe(pipe_cmov_reg);
17000 %}
17001 
17002 instruct minI_rReg(rRegI dst, rRegI src)
17003 %{
17004   predicate(!UseAPX);
17005   match(Set dst (MinI dst src));
17006 
17007   ins_cost(200);
17008   expand %{
17009     rFlagsReg cr;
17010     compI_rReg(cr, dst, src);
17011     cmovI_reg_g(dst, src, cr);
17012   %}
17013 %}
17014 
17015 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17016 %{
17017   predicate(UseAPX);
17018   match(Set dst (MinI src1 src2));
17019   effect(DEF dst, USE src1, USE src2);
17020   flag(PD::Flag_ndd_demotable_opr1);
17021 
17022   ins_cost(200);
17023   expand %{
17024     rFlagsReg cr;
17025     compI_rReg(cr, src1, src2);
17026     cmovI_reg_g_ndd(dst, src1, src2, cr);
17027   %}
17028 %}
17029 
17030 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
17031 %{
17032   predicate(!UseAPX);
17033   effect(USE_DEF dst, USE src, USE cr);
17034 
17035   format %{ "cmovllt $dst, $src\t# max" %}
17036   ins_encode %{
17037     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
17038   %}
17039   ins_pipe(pipe_cmov_reg);
17040 %}
17041 
17042 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
17043 %{
17044   predicate(UseAPX);
17045   effect(DEF dst, USE src1, USE src2, USE cr);
17046 
17047   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
17048   ins_encode %{
17049     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
17050   %}
17051   ins_pipe(pipe_cmov_reg);
17052 %}
17053 
17054 instruct maxI_rReg(rRegI dst, rRegI src)
17055 %{
17056   predicate(!UseAPX);
17057   match(Set dst (MaxI dst src));
17058 
17059   ins_cost(200);
17060   expand %{
17061     rFlagsReg cr;
17062     compI_rReg(cr, dst, src);
17063     cmovI_reg_l(dst, src, cr);
17064   %}
17065 %}
17066 
17067 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17068 %{
17069   predicate(UseAPX);
17070   match(Set dst (MaxI src1 src2));
17071   effect(DEF dst, USE src1, USE src2);
17072   flag(PD::Flag_ndd_demotable_opr1);
17073 
17074   ins_cost(200);
17075   expand %{
17076     rFlagsReg cr;
17077     compI_rReg(cr, src1, src2);
17078     cmovI_reg_l_ndd(dst, src1, src2, cr);
17079   %}
17080 %}
17081 
17082 // ============================================================================
17083 // Branch Instructions
17084 
17085 // Jump Direct - Label defines a relative address from JMP+1
17086 instruct jmpDir(label labl)
17087 %{
17088   match(Goto);
17089   effect(USE labl);
17090 
17091   ins_cost(300);
17092   format %{ "jmp     $labl" %}
17093   size(5);
17094   ins_encode %{
17095     Label* L = $labl$$label;
17096     __ jmp(*L, false); // Always long jump
17097   %}
17098   ins_pipe(pipe_jmp);
17099 %}
17100 
17101 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17102 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
17103 %{
17104   match(If cop cr);
17105   effect(USE labl);
17106 
17107   ins_cost(300);
17108   format %{ "j$cop     $labl" %}
17109   size(6);
17110   ins_encode %{
17111     Label* L = $labl$$label;
17112     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17113   %}
17114   ins_pipe(pipe_jcc);
17115 %}
17116 
17117 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17118 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
17119 %{
17120   match(CountedLoopEnd cop cr);
17121   effect(USE labl);
17122 
17123   ins_cost(300);
17124   format %{ "j$cop     $labl\t# loop end" %}
17125   size(6);
17126   ins_encode %{
17127     Label* L = $labl$$label;
17128     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17129   %}
17130   ins_pipe(pipe_jcc);
17131 %}
17132 
17133 // Jump Direct Conditional - using unsigned comparison
17134 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17135   match(If cop cmp);
17136   effect(USE labl);
17137 
17138   ins_cost(300);
17139   format %{ "j$cop,u   $labl" %}
17140   size(6);
17141   ins_encode %{
17142     Label* L = $labl$$label;
17143     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17144   %}
17145   ins_pipe(pipe_jcc);
17146 %}
17147 
17148 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17149   match(If cop cmp);
17150   effect(USE labl);
17151 
17152   ins_cost(200);
17153   format %{ "j$cop,u   $labl" %}
17154   size(6);
17155   ins_encode %{
17156     Label* L = $labl$$label;
17157     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17158   %}
17159   ins_pipe(pipe_jcc);
17160 %}
17161 
17162 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17163   match(If cop cmp);
17164   effect(USE labl);
17165 
17166   ins_cost(200);
17167   format %{ $$template
17168     if ($cop$$cmpcode == Assembler::notEqual) {
17169       $$emit$$"jp,u    $labl\n\t"
17170       $$emit$$"j$cop,u   $labl"
17171     } else {
17172       $$emit$$"jp,u    done\n\t"
17173       $$emit$$"j$cop,u   $labl\n\t"
17174       $$emit$$"done:"
17175     }
17176   %}
17177   ins_encode %{
17178     Label* l = $labl$$label;
17179     if ($cop$$cmpcode == Assembler::notEqual) {
17180       __ jcc(Assembler::parity, *l, false);
17181       __ jcc(Assembler::notEqual, *l, false);
17182     } else if ($cop$$cmpcode == Assembler::equal) {
17183       Label done;
17184       __ jccb(Assembler::parity, done);
17185       __ jcc(Assembler::equal, *l, false);
17186       __ bind(done);
17187     } else {
17188        ShouldNotReachHere();
17189     }
17190   %}
17191   ins_pipe(pipe_jcc);
17192 %}
17193 
17194 // Jump Direct Conditional - using signed and unsigned comparison
17195 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17196   match(If cop cmp);
17197   effect(USE labl);
17198 
17199   ins_cost(200);
17200   format %{ "j$cop,su   $labl" %}
17201   size(6);
17202   ins_encode %{
17203     Label* L = $labl$$label;
17204     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17205   %}
17206   ins_pipe(pipe_jcc);
17207 %}
17208 
17209 // ============================================================================
17210 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
17211 // superklass array for an instance of the superklass.  Set a hidden
17212 // internal cache on a hit (cache is checked with exposed code in
17213 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
17214 // encoding ALSO sets flags.
17215 
17216 instruct partialSubtypeCheck(rdi_RegP result,
17217                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
17218                              rFlagsReg cr)
17219 %{
17220   match(Set result (PartialSubtypeCheck sub super));
17221   predicate(!UseSecondarySupersTable);
17222   effect(KILL rcx, KILL cr);
17223 
17224   ins_cost(1100);  // slightly larger than the next version
17225   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
17226             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
17227             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
17228             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
17229             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
17230             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
17231             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
17232     "miss:\t" %}
17233 
17234   ins_encode %{
17235     Label miss;
17236     // NB: Callers may assume that, when $result is a valid register,
17237     // check_klass_subtype_slow_path_linear sets it to a nonzero
17238     // value.
17239     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
17240                                             $rcx$$Register, $result$$Register,
17241                                             nullptr, &miss,
17242                                             /*set_cond_codes:*/ true);
17243     __ xorptr($result$$Register, $result$$Register);
17244     __ bind(miss);
17245   %}
17246 
17247   ins_pipe(pipe_slow);
17248 %}
17249 
17250 // ============================================================================
17251 // Two versions of hashtable-based partialSubtypeCheck, both used when
17252 // we need to search for a super class in the secondary supers array.
17253 // The first is used when we don't know _a priori_ the class being
17254 // searched for. The second, far more common, is used when we do know:
17255 // this is used for instanceof, checkcast, and any case where C2 can
17256 // determine it by constant propagation.
17257 
17258 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17259                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17260                                        rFlagsReg cr)
17261 %{
17262   match(Set result (PartialSubtypeCheck sub super));
17263   predicate(UseSecondarySupersTable);
17264   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17265 
17266   ins_cost(1000);
17267   format %{ "partialSubtypeCheck $result, $sub, $super" %}
17268 
17269   ins_encode %{
17270     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17271 					 $temp3$$Register, $temp4$$Register, $result$$Register);
17272   %}
17273 
17274   ins_pipe(pipe_slow);
17275 %}
17276 
17277 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17278                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17279                                        rFlagsReg cr)
17280 %{
17281   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17282   predicate(UseSecondarySupersTable);
17283   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17284 
17285   ins_cost(700);  // smaller than the next version
17286   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17287 
17288   ins_encode %{
17289     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17290     if (InlineSecondarySupersTest) {
17291       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17292                                        $temp3$$Register, $temp4$$Register, $result$$Register,
17293                                        super_klass_slot);
17294     } else {
17295       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17296     }
17297   %}
17298 
17299   ins_pipe(pipe_slow);
17300 %}
17301 
17302 // ============================================================================
17303 // Branch Instructions -- short offset versions
17304 //
17305 // These instructions are used to replace jumps of a long offset (the default
17306 // match) with jumps of a shorter offset.  These instructions are all tagged
17307 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17308 // match rules in general matching.  Instead, the ADLC generates a conversion
17309 // method in the MachNode which can be used to do in-place replacement of the
17310 // long variant with the shorter variant.  The compiler will determine if a
17311 // branch can be taken by the is_short_branch_offset() predicate in the machine
17312 // specific code section of the file.
17313 
17314 // Jump Direct - Label defines a relative address from JMP+1
17315 instruct jmpDir_short(label labl) %{
17316   match(Goto);
17317   effect(USE labl);
17318 
17319   ins_cost(300);
17320   format %{ "jmp,s   $labl" %}
17321   size(2);
17322   ins_encode %{
17323     Label* L = $labl$$label;
17324     __ jmpb(*L);
17325   %}
17326   ins_pipe(pipe_jmp);
17327   ins_short_branch(1);
17328 %}
17329 
17330 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17331 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17332   match(If cop cr);
17333   effect(USE labl);
17334 
17335   ins_cost(300);
17336   format %{ "j$cop,s   $labl" %}
17337   size(2);
17338   ins_encode %{
17339     Label* L = $labl$$label;
17340     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17341   %}
17342   ins_pipe(pipe_jcc);
17343   ins_short_branch(1);
17344 %}
17345 
17346 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17347 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17348   match(CountedLoopEnd cop cr);
17349   effect(USE labl);
17350 
17351   ins_cost(300);
17352   format %{ "j$cop,s   $labl\t# loop end" %}
17353   size(2);
17354   ins_encode %{
17355     Label* L = $labl$$label;
17356     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17357   %}
17358   ins_pipe(pipe_jcc);
17359   ins_short_branch(1);
17360 %}
17361 
17362 // Jump Direct Conditional - using unsigned comparison
17363 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17364   match(If cop cmp);
17365   effect(USE labl);
17366 
17367   ins_cost(300);
17368   format %{ "j$cop,us  $labl" %}
17369   size(2);
17370   ins_encode %{
17371     Label* L = $labl$$label;
17372     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17373   %}
17374   ins_pipe(pipe_jcc);
17375   ins_short_branch(1);
17376 %}
17377 
17378 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17379   match(If cop cmp);
17380   effect(USE labl);
17381 
17382   ins_cost(300);
17383   format %{ "j$cop,us  $labl" %}
17384   size(2);
17385   ins_encode %{
17386     Label* L = $labl$$label;
17387     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17388   %}
17389   ins_pipe(pipe_jcc);
17390   ins_short_branch(1);
17391 %}
17392 
17393 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17394   match(If cop cmp);
17395   effect(USE labl);
17396 
17397   ins_cost(300);
17398   format %{ $$template
17399     if ($cop$$cmpcode == Assembler::notEqual) {
17400       $$emit$$"jp,u,s  $labl\n\t"
17401       $$emit$$"j$cop,u,s  $labl"
17402     } else {
17403       $$emit$$"jp,u,s  done\n\t"
17404       $$emit$$"j$cop,u,s  $labl\n\t"
17405       $$emit$$"done:"
17406     }
17407   %}
17408   size(4);
17409   ins_encode %{
17410     Label* l = $labl$$label;
17411     if ($cop$$cmpcode == Assembler::notEqual) {
17412       __ jccb(Assembler::parity, *l);
17413       __ jccb(Assembler::notEqual, *l);
17414     } else if ($cop$$cmpcode == Assembler::equal) {
17415       Label done;
17416       __ jccb(Assembler::parity, done);
17417       __ jccb(Assembler::equal, *l);
17418       __ bind(done);
17419     } else {
17420        ShouldNotReachHere();
17421     }
17422   %}
17423   ins_pipe(pipe_jcc);
17424   ins_short_branch(1);
17425 %}
17426 
17427 // Jump Direct Conditional - using signed and unsigned comparison
17428 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17429   match(If cop cmp);
17430   effect(USE labl);
17431 
17432   ins_cost(300);
17433   format %{ "j$cop,sus  $labl" %}
17434   size(2);
17435   ins_encode %{
17436     Label* L = $labl$$label;
17437     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17438   %}
17439   ins_pipe(pipe_jcc);
17440   ins_short_branch(1);
17441 %}
17442 
17443 // ============================================================================
17444 // inlined locking and unlocking
17445 
17446 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17447   match(Set cr (FastLock object box));
17448   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17449   ins_cost(300);
17450   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17451   ins_encode %{
17452     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17453   %}
17454   ins_pipe(pipe_slow);
17455 %}
17456 
17457 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17458   match(Set cr (FastUnlock object rax_reg));
17459   effect(TEMP tmp, USE_KILL rax_reg);
17460   ins_cost(300);
17461   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17462   ins_encode %{
17463     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17464   %}
17465   ins_pipe(pipe_slow);
17466 %}
17467 
17468 
17469 // ============================================================================
17470 // Safepoint Instructions
17471 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17472 %{
17473   match(SafePoint poll);
17474   effect(KILL cr, USE poll);
17475 
17476   format %{ "testl   rax, [$poll]\t"
17477             "# Safepoint: poll for GC" %}
17478   ins_cost(125);
17479   ins_encode %{
17480     __ relocate(relocInfo::poll_type);
17481     address pre_pc = __ pc();
17482     __ testl(rax, Address($poll$$Register, 0));
17483     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17484   %}
17485   ins_pipe(ialu_reg_mem);
17486 %}
17487 
17488 instruct mask_all_evexL(kReg dst, rRegL src) %{
17489   match(Set dst (MaskAll src));
17490   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17491   ins_encode %{
17492     int mask_len = Matcher::vector_length(this);
17493     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17494   %}
17495   ins_pipe( pipe_slow );
17496 %}
17497 
17498 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17499   predicate(Matcher::vector_length(n) > 32);
17500   match(Set dst (MaskAll src));
17501   effect(TEMP tmp);
17502   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17503   ins_encode %{
17504     int mask_len = Matcher::vector_length(this);
17505     __ movslq($tmp$$Register, $src$$Register);
17506     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17507   %}
17508   ins_pipe( pipe_slow );
17509 %}
17510 
17511 // ============================================================================
17512 // Procedure Call/Return Instructions
17513 // Call Java Static Instruction
17514 // Note: If this code changes, the corresponding ret_addr_offset() and
17515 //       compute_padding() functions will have to be adjusted.
17516 instruct CallStaticJavaDirect(method meth) %{
17517   match(CallStaticJava);
17518   effect(USE meth);
17519 
17520   ins_cost(300);
17521   format %{ "call,static " %}
17522   opcode(0xE8); /* E8 cd */
17523   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17524   ins_pipe(pipe_slow);
17525   ins_alignment(4);
17526 %}
17527 
17528 // Call Java Dynamic Instruction
17529 // Note: If this code changes, the corresponding ret_addr_offset() and
17530 //       compute_padding() functions will have to be adjusted.
17531 instruct CallDynamicJavaDirect(method meth)
17532 %{
17533   match(CallDynamicJava);
17534   effect(USE meth);
17535 
17536   ins_cost(300);
17537   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17538             "call,dynamic " %}
17539   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17540   ins_pipe(pipe_slow);
17541   ins_alignment(4);
17542 %}
17543 
17544 // Call Runtime Instruction
17545 instruct CallRuntimeDirect(method meth)
17546 %{
17547   match(CallRuntime);
17548   effect(USE meth);
17549 
17550   ins_cost(300);
17551   format %{ "call,runtime " %}
17552   ins_encode(clear_avx, Java_To_Runtime(meth));
17553   ins_pipe(pipe_slow);
17554 %}
17555 
17556 // Call runtime without safepoint
17557 instruct CallLeafDirect(method meth)
17558 %{
17559   match(CallLeaf);
17560   effect(USE meth);
17561 
17562   ins_cost(300);
17563   format %{ "call_leaf,runtime " %}
17564   ins_encode(clear_avx, Java_To_Runtime(meth));
17565   ins_pipe(pipe_slow);
17566 %}
17567 
17568 // Call runtime without safepoint and with vector arguments
17569 instruct CallLeafDirectVector(method meth)
17570 %{
17571   match(CallLeafVector);
17572   effect(USE meth);
17573 
17574   ins_cost(300);
17575   format %{ "call_leaf,vector " %}
17576   ins_encode(Java_To_Runtime(meth));
17577   ins_pipe(pipe_slow);
17578 %}
17579 
17580 // Call runtime without safepoint
17581 // entry point is null, target holds the address to call
17582 instruct CallLeafNoFPInDirect(rRegP target)
17583 %{
17584   predicate(n->as_Call()->entry_point() == nullptr);
17585   match(CallLeafNoFP target);
17586 
17587   ins_cost(300);
17588   format %{ "call_leaf_nofp,runtime indirect " %}
17589   ins_encode %{
17590      __ call($target$$Register);
17591   %}
17592 
17593   ins_pipe(pipe_slow);
17594 %}
17595 
17596 // Call runtime without safepoint
17597 instruct CallLeafNoFPDirect(method meth)
17598 %{
17599   predicate(n->as_Call()->entry_point() != nullptr);
17600   match(CallLeafNoFP);
17601   effect(USE meth);
17602 
17603   ins_cost(300);
17604   format %{ "call_leaf_nofp,runtime " %}
17605   ins_encode(clear_avx, Java_To_Runtime(meth));
17606   ins_pipe(pipe_slow);
17607 %}
17608 
17609 // Return Instruction
17610 // Remove the return address & jump to it.
17611 // Notice: We always emit a nop after a ret to make sure there is room
17612 // for safepoint patching
17613 instruct Ret()
17614 %{
17615   match(Return);
17616 
17617   format %{ "ret" %}
17618   ins_encode %{
17619     __ ret(0);
17620   %}
17621   ins_pipe(pipe_jmp);
17622 %}
17623 
17624 // Tail Call; Jump from runtime stub to Java code.
17625 // Also known as an 'interprocedural jump'.
17626 // Target of jump will eventually return to caller.
17627 // TailJump below removes the return address.
17628 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17629 // emitted just above the TailCall which has reset rbp to the caller state.
17630 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17631 %{
17632   match(TailCall jump_target method_ptr);
17633 
17634   ins_cost(300);
17635   format %{ "jmp     $jump_target\t# rbx holds method" %}
17636   ins_encode %{
17637     __ jmp($jump_target$$Register);
17638   %}
17639   ins_pipe(pipe_jmp);
17640 %}
17641 
17642 // Tail Jump; remove the return address; jump to target.
17643 // TailCall above leaves the return address around.
17644 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17645 %{
17646   match(TailJump jump_target ex_oop);
17647 
17648   ins_cost(300);
17649   format %{ "popq    rdx\t# pop return address\n\t"
17650             "jmp     $jump_target" %}
17651   ins_encode %{
17652     __ popq(as_Register(RDX_enc));
17653     __ jmp($jump_target$$Register);
17654   %}
17655   ins_pipe(pipe_jmp);
17656 %}
17657 
17658 // Forward exception.
17659 instruct ForwardExceptionjmp()
17660 %{
17661   match(ForwardException);
17662 
17663   format %{ "jmp     forward_exception_stub" %}
17664   ins_encode %{
17665     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17666   %}
17667   ins_pipe(pipe_jmp);
17668 %}
17669 
17670 // Create exception oop: created by stack-crawling runtime code.
17671 // Created exception is now available to this handler, and is setup
17672 // just prior to jumping to this handler.  No code emitted.
17673 instruct CreateException(rax_RegP ex_oop)
17674 %{
17675   match(Set ex_oop (CreateEx));
17676 
17677   size(0);
17678   // use the following format syntax
17679   format %{ "# exception oop is in rax; no code emitted" %}
17680   ins_encode();
17681   ins_pipe(empty);
17682 %}
17683 
17684 // Rethrow exception:
17685 // The exception oop will come in the first argument position.
17686 // Then JUMP (not call) to the rethrow stub code.
17687 instruct RethrowException()
17688 %{
17689   match(Rethrow);
17690 
17691   // use the following format syntax
17692   format %{ "jmp     rethrow_stub" %}
17693   ins_encode %{
17694     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17695   %}
17696   ins_pipe(pipe_jmp);
17697 %}
17698 
17699 // ============================================================================
17700 // This name is KNOWN by the ADLC and cannot be changed.
17701 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17702 // for this guy.
17703 instruct tlsLoadP(r15_RegP dst) %{
17704   match(Set dst (ThreadLocal));
17705   effect(DEF dst);
17706 
17707   size(0);
17708   format %{ "# TLS is in R15" %}
17709   ins_encode( /*empty encoding*/ );
17710   ins_pipe(ialu_reg_reg);
17711 %}
17712 
17713 instruct addF_reg(regF dst, regF src) %{
17714   predicate(UseAVX == 0);
17715   match(Set dst (AddF dst src));
17716 
17717   format %{ "addss   $dst, $src" %}
17718   ins_cost(150);
17719   ins_encode %{
17720     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17721   %}
17722   ins_pipe(pipe_slow);
17723 %}
17724 
17725 instruct addF_mem(regF dst, memory src) %{
17726   predicate(UseAVX == 0);
17727   match(Set dst (AddF dst (LoadF src)));
17728 
17729   format %{ "addss   $dst, $src" %}
17730   ins_cost(150);
17731   ins_encode %{
17732     __ addss($dst$$XMMRegister, $src$$Address);
17733   %}
17734   ins_pipe(pipe_slow);
17735 %}
17736 
17737 instruct addF_imm(regF dst, immF con) %{
17738   predicate(UseAVX == 0);
17739   match(Set dst (AddF dst con));
17740   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17741   ins_cost(150);
17742   ins_encode %{
17743     __ addss($dst$$XMMRegister, $constantaddress($con));
17744   %}
17745   ins_pipe(pipe_slow);
17746 %}
17747 
17748 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17749   predicate(UseAVX > 0);
17750   match(Set dst (AddF src1 src2));
17751 
17752   format %{ "vaddss  $dst, $src1, $src2" %}
17753   ins_cost(150);
17754   ins_encode %{
17755     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17756   %}
17757   ins_pipe(pipe_slow);
17758 %}
17759 
17760 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17761   predicate(UseAVX > 0);
17762   match(Set dst (AddF src1 (LoadF src2)));
17763 
17764   format %{ "vaddss  $dst, $src1, $src2" %}
17765   ins_cost(150);
17766   ins_encode %{
17767     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17768   %}
17769   ins_pipe(pipe_slow);
17770 %}
17771 
17772 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17773   predicate(UseAVX > 0);
17774   match(Set dst (AddF src con));
17775 
17776   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17777   ins_cost(150);
17778   ins_encode %{
17779     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17780   %}
17781   ins_pipe(pipe_slow);
17782 %}
17783 
17784 instruct addD_reg(regD dst, regD src) %{
17785   predicate(UseAVX == 0);
17786   match(Set dst (AddD dst src));
17787 
17788   format %{ "addsd   $dst, $src" %}
17789   ins_cost(150);
17790   ins_encode %{
17791     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17792   %}
17793   ins_pipe(pipe_slow);
17794 %}
17795 
17796 instruct addD_mem(regD dst, memory src) %{
17797   predicate(UseAVX == 0);
17798   match(Set dst (AddD dst (LoadD src)));
17799 
17800   format %{ "addsd   $dst, $src" %}
17801   ins_cost(150);
17802   ins_encode %{
17803     __ addsd($dst$$XMMRegister, $src$$Address);
17804   %}
17805   ins_pipe(pipe_slow);
17806 %}
17807 
17808 instruct addD_imm(regD dst, immD con) %{
17809   predicate(UseAVX == 0);
17810   match(Set dst (AddD dst con));
17811   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17812   ins_cost(150);
17813   ins_encode %{
17814     __ addsd($dst$$XMMRegister, $constantaddress($con));
17815   %}
17816   ins_pipe(pipe_slow);
17817 %}
17818 
17819 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17820   predicate(UseAVX > 0);
17821   match(Set dst (AddD src1 src2));
17822 
17823   format %{ "vaddsd  $dst, $src1, $src2" %}
17824   ins_cost(150);
17825   ins_encode %{
17826     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17827   %}
17828   ins_pipe(pipe_slow);
17829 %}
17830 
17831 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17832   predicate(UseAVX > 0);
17833   match(Set dst (AddD src1 (LoadD src2)));
17834 
17835   format %{ "vaddsd  $dst, $src1, $src2" %}
17836   ins_cost(150);
17837   ins_encode %{
17838     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17839   %}
17840   ins_pipe(pipe_slow);
17841 %}
17842 
17843 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17844   predicate(UseAVX > 0);
17845   match(Set dst (AddD src con));
17846 
17847   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17848   ins_cost(150);
17849   ins_encode %{
17850     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17851   %}
17852   ins_pipe(pipe_slow);
17853 %}
17854 
17855 instruct subF_reg(regF dst, regF src) %{
17856   predicate(UseAVX == 0);
17857   match(Set dst (SubF dst src));
17858 
17859   format %{ "subss   $dst, $src" %}
17860   ins_cost(150);
17861   ins_encode %{
17862     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17863   %}
17864   ins_pipe(pipe_slow);
17865 %}
17866 
17867 instruct subF_mem(regF dst, memory src) %{
17868   predicate(UseAVX == 0);
17869   match(Set dst (SubF dst (LoadF src)));
17870 
17871   format %{ "subss   $dst, $src" %}
17872   ins_cost(150);
17873   ins_encode %{
17874     __ subss($dst$$XMMRegister, $src$$Address);
17875   %}
17876   ins_pipe(pipe_slow);
17877 %}
17878 
17879 instruct subF_imm(regF dst, immF con) %{
17880   predicate(UseAVX == 0);
17881   match(Set dst (SubF dst con));
17882   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17883   ins_cost(150);
17884   ins_encode %{
17885     __ subss($dst$$XMMRegister, $constantaddress($con));
17886   %}
17887   ins_pipe(pipe_slow);
17888 %}
17889 
17890 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17891   predicate(UseAVX > 0);
17892   match(Set dst (SubF src1 src2));
17893 
17894   format %{ "vsubss  $dst, $src1, $src2" %}
17895   ins_cost(150);
17896   ins_encode %{
17897     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17898   %}
17899   ins_pipe(pipe_slow);
17900 %}
17901 
17902 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17903   predicate(UseAVX > 0);
17904   match(Set dst (SubF src1 (LoadF src2)));
17905 
17906   format %{ "vsubss  $dst, $src1, $src2" %}
17907   ins_cost(150);
17908   ins_encode %{
17909     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17910   %}
17911   ins_pipe(pipe_slow);
17912 %}
17913 
17914 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17915   predicate(UseAVX > 0);
17916   match(Set dst (SubF src con));
17917 
17918   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17919   ins_cost(150);
17920   ins_encode %{
17921     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17922   %}
17923   ins_pipe(pipe_slow);
17924 %}
17925 
17926 instruct subD_reg(regD dst, regD src) %{
17927   predicate(UseAVX == 0);
17928   match(Set dst (SubD dst src));
17929 
17930   format %{ "subsd   $dst, $src" %}
17931   ins_cost(150);
17932   ins_encode %{
17933     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17934   %}
17935   ins_pipe(pipe_slow);
17936 %}
17937 
17938 instruct subD_mem(regD dst, memory src) %{
17939   predicate(UseAVX == 0);
17940   match(Set dst (SubD dst (LoadD src)));
17941 
17942   format %{ "subsd   $dst, $src" %}
17943   ins_cost(150);
17944   ins_encode %{
17945     __ subsd($dst$$XMMRegister, $src$$Address);
17946   %}
17947   ins_pipe(pipe_slow);
17948 %}
17949 
17950 instruct subD_imm(regD dst, immD con) %{
17951   predicate(UseAVX == 0);
17952   match(Set dst (SubD dst con));
17953   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17954   ins_cost(150);
17955   ins_encode %{
17956     __ subsd($dst$$XMMRegister, $constantaddress($con));
17957   %}
17958   ins_pipe(pipe_slow);
17959 %}
17960 
17961 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17962   predicate(UseAVX > 0);
17963   match(Set dst (SubD src1 src2));
17964 
17965   format %{ "vsubsd  $dst, $src1, $src2" %}
17966   ins_cost(150);
17967   ins_encode %{
17968     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17969   %}
17970   ins_pipe(pipe_slow);
17971 %}
17972 
17973 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17974   predicate(UseAVX > 0);
17975   match(Set dst (SubD src1 (LoadD src2)));
17976 
17977   format %{ "vsubsd  $dst, $src1, $src2" %}
17978   ins_cost(150);
17979   ins_encode %{
17980     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17981   %}
17982   ins_pipe(pipe_slow);
17983 %}
17984 
17985 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17986   predicate(UseAVX > 0);
17987   match(Set dst (SubD src con));
17988 
17989   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17990   ins_cost(150);
17991   ins_encode %{
17992     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17993   %}
17994   ins_pipe(pipe_slow);
17995 %}
17996 
17997 instruct mulF_reg(regF dst, regF src) %{
17998   predicate(UseAVX == 0);
17999   match(Set dst (MulF dst src));
18000 
18001   format %{ "mulss   $dst, $src" %}
18002   ins_cost(150);
18003   ins_encode %{
18004     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
18005   %}
18006   ins_pipe(pipe_slow);
18007 %}
18008 
18009 instruct mulF_mem(regF dst, memory src) %{
18010   predicate(UseAVX == 0);
18011   match(Set dst (MulF dst (LoadF src)));
18012 
18013   format %{ "mulss   $dst, $src" %}
18014   ins_cost(150);
18015   ins_encode %{
18016     __ mulss($dst$$XMMRegister, $src$$Address);
18017   %}
18018   ins_pipe(pipe_slow);
18019 %}
18020 
18021 instruct mulF_imm(regF dst, immF con) %{
18022   predicate(UseAVX == 0);
18023   match(Set dst (MulF dst con));
18024   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18025   ins_cost(150);
18026   ins_encode %{
18027     __ mulss($dst$$XMMRegister, $constantaddress($con));
18028   %}
18029   ins_pipe(pipe_slow);
18030 %}
18031 
18032 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
18033   predicate(UseAVX > 0);
18034   match(Set dst (MulF src1 src2));
18035 
18036   format %{ "vmulss  $dst, $src1, $src2" %}
18037   ins_cost(150);
18038   ins_encode %{
18039     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18040   %}
18041   ins_pipe(pipe_slow);
18042 %}
18043 
18044 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
18045   predicate(UseAVX > 0);
18046   match(Set dst (MulF src1 (LoadF src2)));
18047 
18048   format %{ "vmulss  $dst, $src1, $src2" %}
18049   ins_cost(150);
18050   ins_encode %{
18051     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18052   %}
18053   ins_pipe(pipe_slow);
18054 %}
18055 
18056 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
18057   predicate(UseAVX > 0);
18058   match(Set dst (MulF src con));
18059 
18060   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18061   ins_cost(150);
18062   ins_encode %{
18063     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18064   %}
18065   ins_pipe(pipe_slow);
18066 %}
18067 
18068 instruct mulD_reg(regD dst, regD src) %{
18069   predicate(UseAVX == 0);
18070   match(Set dst (MulD dst src));
18071 
18072   format %{ "mulsd   $dst, $src" %}
18073   ins_cost(150);
18074   ins_encode %{
18075     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
18076   %}
18077   ins_pipe(pipe_slow);
18078 %}
18079 
18080 instruct mulD_mem(regD dst, memory src) %{
18081   predicate(UseAVX == 0);
18082   match(Set dst (MulD dst (LoadD src)));
18083 
18084   format %{ "mulsd   $dst, $src" %}
18085   ins_cost(150);
18086   ins_encode %{
18087     __ mulsd($dst$$XMMRegister, $src$$Address);
18088   %}
18089   ins_pipe(pipe_slow);
18090 %}
18091 
18092 instruct mulD_imm(regD dst, immD con) %{
18093   predicate(UseAVX == 0);
18094   match(Set dst (MulD dst con));
18095   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18096   ins_cost(150);
18097   ins_encode %{
18098     __ mulsd($dst$$XMMRegister, $constantaddress($con));
18099   %}
18100   ins_pipe(pipe_slow);
18101 %}
18102 
18103 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
18104   predicate(UseAVX > 0);
18105   match(Set dst (MulD src1 src2));
18106 
18107   format %{ "vmulsd  $dst, $src1, $src2" %}
18108   ins_cost(150);
18109   ins_encode %{
18110     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18111   %}
18112   ins_pipe(pipe_slow);
18113 %}
18114 
18115 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
18116   predicate(UseAVX > 0);
18117   match(Set dst (MulD src1 (LoadD src2)));
18118 
18119   format %{ "vmulsd  $dst, $src1, $src2" %}
18120   ins_cost(150);
18121   ins_encode %{
18122     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18123   %}
18124   ins_pipe(pipe_slow);
18125 %}
18126 
18127 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
18128   predicate(UseAVX > 0);
18129   match(Set dst (MulD src con));
18130 
18131   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18132   ins_cost(150);
18133   ins_encode %{
18134     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18135   %}
18136   ins_pipe(pipe_slow);
18137 %}
18138 
18139 instruct divF_reg(regF dst, regF src) %{
18140   predicate(UseAVX == 0);
18141   match(Set dst (DivF dst src));
18142 
18143   format %{ "divss   $dst, $src" %}
18144   ins_cost(150);
18145   ins_encode %{
18146     __ divss($dst$$XMMRegister, $src$$XMMRegister);
18147   %}
18148   ins_pipe(pipe_slow);
18149 %}
18150 
18151 instruct divF_mem(regF dst, memory src) %{
18152   predicate(UseAVX == 0);
18153   match(Set dst (DivF dst (LoadF src)));
18154 
18155   format %{ "divss   $dst, $src" %}
18156   ins_cost(150);
18157   ins_encode %{
18158     __ divss($dst$$XMMRegister, $src$$Address);
18159   %}
18160   ins_pipe(pipe_slow);
18161 %}
18162 
18163 instruct divF_imm(regF dst, immF con) %{
18164   predicate(UseAVX == 0);
18165   match(Set dst (DivF dst con));
18166   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18167   ins_cost(150);
18168   ins_encode %{
18169     __ divss($dst$$XMMRegister, $constantaddress($con));
18170   %}
18171   ins_pipe(pipe_slow);
18172 %}
18173 
18174 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
18175   predicate(UseAVX > 0);
18176   match(Set dst (DivF src1 src2));
18177 
18178   format %{ "vdivss  $dst, $src1, $src2" %}
18179   ins_cost(150);
18180   ins_encode %{
18181     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18182   %}
18183   ins_pipe(pipe_slow);
18184 %}
18185 
18186 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
18187   predicate(UseAVX > 0);
18188   match(Set dst (DivF src1 (LoadF src2)));
18189 
18190   format %{ "vdivss  $dst, $src1, $src2" %}
18191   ins_cost(150);
18192   ins_encode %{
18193     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18194   %}
18195   ins_pipe(pipe_slow);
18196 %}
18197 
18198 instruct divF_reg_imm(regF dst, regF src, immF con) %{
18199   predicate(UseAVX > 0);
18200   match(Set dst (DivF src con));
18201 
18202   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18203   ins_cost(150);
18204   ins_encode %{
18205     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18206   %}
18207   ins_pipe(pipe_slow);
18208 %}
18209 
18210 instruct divD_reg(regD dst, regD src) %{
18211   predicate(UseAVX == 0);
18212   match(Set dst (DivD dst src));
18213 
18214   format %{ "divsd   $dst, $src" %}
18215   ins_cost(150);
18216   ins_encode %{
18217     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
18218   %}
18219   ins_pipe(pipe_slow);
18220 %}
18221 
18222 instruct divD_mem(regD dst, memory src) %{
18223   predicate(UseAVX == 0);
18224   match(Set dst (DivD dst (LoadD src)));
18225 
18226   format %{ "divsd   $dst, $src" %}
18227   ins_cost(150);
18228   ins_encode %{
18229     __ divsd($dst$$XMMRegister, $src$$Address);
18230   %}
18231   ins_pipe(pipe_slow);
18232 %}
18233 
18234 instruct divD_imm(regD dst, immD con) %{
18235   predicate(UseAVX == 0);
18236   match(Set dst (DivD dst con));
18237   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18238   ins_cost(150);
18239   ins_encode %{
18240     __ divsd($dst$$XMMRegister, $constantaddress($con));
18241   %}
18242   ins_pipe(pipe_slow);
18243 %}
18244 
18245 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
18246   predicate(UseAVX > 0);
18247   match(Set dst (DivD src1 src2));
18248 
18249   format %{ "vdivsd  $dst, $src1, $src2" %}
18250   ins_cost(150);
18251   ins_encode %{
18252     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18253   %}
18254   ins_pipe(pipe_slow);
18255 %}
18256 
18257 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18258   predicate(UseAVX > 0);
18259   match(Set dst (DivD src1 (LoadD src2)));
18260 
18261   format %{ "vdivsd  $dst, $src1, $src2" %}
18262   ins_cost(150);
18263   ins_encode %{
18264     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18265   %}
18266   ins_pipe(pipe_slow);
18267 %}
18268 
18269 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18270   predicate(UseAVX > 0);
18271   match(Set dst (DivD src con));
18272 
18273   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18274   ins_cost(150);
18275   ins_encode %{
18276     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18277   %}
18278   ins_pipe(pipe_slow);
18279 %}
18280 
18281 instruct absF_reg(regF dst) %{
18282   predicate(UseAVX == 0);
18283   match(Set dst (AbsF dst));
18284   ins_cost(150);
18285   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
18286   ins_encode %{
18287     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18288   %}
18289   ins_pipe(pipe_slow);
18290 %}
18291 
18292 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18293   predicate(UseAVX > 0);
18294   match(Set dst (AbsF src));
18295   ins_cost(150);
18296   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18297   ins_encode %{
18298     int vlen_enc = Assembler::AVX_128bit;
18299     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18300               ExternalAddress(float_signmask()), vlen_enc);
18301   %}
18302   ins_pipe(pipe_slow);
18303 %}
18304 
18305 instruct absD_reg(regD dst) %{
18306   predicate(UseAVX == 0);
18307   match(Set dst (AbsD dst));
18308   ins_cost(150);
18309   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
18310             "# abs double by sign masking" %}
18311   ins_encode %{
18312     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18313   %}
18314   ins_pipe(pipe_slow);
18315 %}
18316 
18317 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18318   predicate(UseAVX > 0);
18319   match(Set dst (AbsD src));
18320   ins_cost(150);
18321   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
18322             "# abs double by sign masking" %}
18323   ins_encode %{
18324     int vlen_enc = Assembler::AVX_128bit;
18325     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18326               ExternalAddress(double_signmask()), vlen_enc);
18327   %}
18328   ins_pipe(pipe_slow);
18329 %}
18330 
18331 instruct negF_reg(regF dst) %{
18332   predicate(UseAVX == 0);
18333   match(Set dst (NegF dst));
18334   ins_cost(150);
18335   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
18336   ins_encode %{
18337     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18338   %}
18339   ins_pipe(pipe_slow);
18340 %}
18341 
18342 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18343   predicate(UseAVX > 0);
18344   match(Set dst (NegF src));
18345   ins_cost(150);
18346   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18347   ins_encode %{
18348     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18349                  ExternalAddress(float_signflip()));
18350   %}
18351   ins_pipe(pipe_slow);
18352 %}
18353 
18354 instruct negD_reg(regD dst) %{
18355   predicate(UseAVX == 0);
18356   match(Set dst (NegD dst));
18357   ins_cost(150);
18358   format %{ "xorpd   $dst, [0x8000000000000000]\t"
18359             "# neg double by sign flipping" %}
18360   ins_encode %{
18361     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18362   %}
18363   ins_pipe(pipe_slow);
18364 %}
18365 
18366 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18367   predicate(UseAVX > 0);
18368   match(Set dst (NegD src));
18369   ins_cost(150);
18370   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
18371             "# neg double by sign flipping" %}
18372   ins_encode %{
18373     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18374                  ExternalAddress(double_signflip()));
18375   %}
18376   ins_pipe(pipe_slow);
18377 %}
18378 
18379 // sqrtss instruction needs destination register to be pre initialized for best performance
18380 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18381 instruct sqrtF_reg(regF dst) %{
18382   match(Set dst (SqrtF dst));
18383   format %{ "sqrtss  $dst, $dst" %}
18384   ins_encode %{
18385     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18386   %}
18387   ins_pipe(pipe_slow);
18388 %}
18389 
18390 // sqrtsd instruction needs destination register to be pre initialized for best performance
18391 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18392 instruct sqrtD_reg(regD dst) %{
18393   match(Set dst (SqrtD dst));
18394   format %{ "sqrtsd  $dst, $dst" %}
18395   ins_encode %{
18396     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18397   %}
18398   ins_pipe(pipe_slow);
18399 %}
18400 
18401 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18402   effect(TEMP tmp);
18403   match(Set dst (ConvF2HF src));
18404   ins_cost(125);
18405   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18406   ins_encode %{
18407     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18408   %}
18409   ins_pipe( pipe_slow );
18410 %}
18411 
18412 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18413   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18414   effect(TEMP ktmp, TEMP rtmp);
18415   match(Set mem (StoreC mem (ConvF2HF src)));
18416   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18417   ins_encode %{
18418     __ movl($rtmp$$Register, 0x1);
18419     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18420     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18421   %}
18422   ins_pipe( pipe_slow );
18423 %}
18424 
18425 instruct vconvF2HF(vec dst, vec src) %{
18426   match(Set dst (VectorCastF2HF src));
18427   format %{ "vector_conv_F2HF $dst $src" %}
18428   ins_encode %{
18429     int vlen_enc = vector_length_encoding(this, $src);
18430     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18431   %}
18432   ins_pipe( pipe_slow );
18433 %}
18434 
18435 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18436   predicate(n->as_StoreVector()->memory_size() >= 16);
18437   match(Set mem (StoreVector mem (VectorCastF2HF src)));
18438   format %{ "vcvtps2ph $mem,$src" %}
18439   ins_encode %{
18440     int vlen_enc = vector_length_encoding(this, $src);
18441     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18442   %}
18443   ins_pipe( pipe_slow );
18444 %}
18445 
18446 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18447   match(Set dst (ConvHF2F src));
18448   format %{ "vcvtph2ps $dst,$src" %}
18449   ins_encode %{
18450     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18451   %}
18452   ins_pipe( pipe_slow );
18453 %}
18454 
18455 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18456   match(Set dst (VectorCastHF2F (LoadVector mem)));
18457   format %{ "vcvtph2ps $dst,$mem" %}
18458   ins_encode %{
18459     int vlen_enc = vector_length_encoding(this);
18460     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18461   %}
18462   ins_pipe( pipe_slow );
18463 %}
18464 
18465 instruct vconvHF2F(vec dst, vec src) %{
18466   match(Set dst (VectorCastHF2F src));
18467   ins_cost(125);
18468   format %{ "vector_conv_HF2F $dst,$src" %}
18469   ins_encode %{
18470     int vlen_enc = vector_length_encoding(this);
18471     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18472   %}
18473   ins_pipe( pipe_slow );
18474 %}
18475 
18476 // ---------------------------------------- VectorReinterpret ------------------------------------
18477 instruct reinterpret_mask(kReg dst) %{
18478   predicate(n->bottom_type()->isa_vectmask() &&
18479             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18480   match(Set dst (VectorReinterpret dst));
18481   ins_cost(125);
18482   format %{ "vector_reinterpret $dst\t!" %}
18483   ins_encode %{
18484     // empty
18485   %}
18486   ins_pipe( pipe_slow );
18487 %}
18488 
18489 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18490   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18491             n->bottom_type()->isa_vectmask() &&
18492             n->in(1)->bottom_type()->isa_vectmask() &&
18493             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18494             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18495   match(Set dst (VectorReinterpret src));
18496   effect(TEMP xtmp);
18497   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18498   ins_encode %{
18499      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18500      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18501      assert(src_sz == dst_sz , "src and dst size mismatch");
18502      int vlen_enc = vector_length_encoding(src_sz);
18503      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18504      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18505   %}
18506   ins_pipe( pipe_slow );
18507 %}
18508 
18509 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18510   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18511             n->bottom_type()->isa_vectmask() &&
18512             n->in(1)->bottom_type()->isa_vectmask() &&
18513             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18514              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18515             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18516   match(Set dst (VectorReinterpret src));
18517   effect(TEMP xtmp);
18518   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18519   ins_encode %{
18520      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18521      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18522      assert(src_sz == dst_sz , "src and dst size mismatch");
18523      int vlen_enc = vector_length_encoding(src_sz);
18524      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18525      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18526   %}
18527   ins_pipe( pipe_slow );
18528 %}
18529 
18530 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18531   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18532             n->bottom_type()->isa_vectmask() &&
18533             n->in(1)->bottom_type()->isa_vectmask() &&
18534             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18535              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18536             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18537   match(Set dst (VectorReinterpret src));
18538   effect(TEMP xtmp);
18539   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18540   ins_encode %{
18541      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18542      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18543      assert(src_sz == dst_sz , "src and dst size mismatch");
18544      int vlen_enc = vector_length_encoding(src_sz);
18545      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18546      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18547   %}
18548   ins_pipe( pipe_slow );
18549 %}
18550 
18551 instruct reinterpret(vec dst) %{
18552   predicate(!n->bottom_type()->isa_vectmask() &&
18553             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18554   match(Set dst (VectorReinterpret dst));
18555   ins_cost(125);
18556   format %{ "vector_reinterpret $dst\t!" %}
18557   ins_encode %{
18558     // empty
18559   %}
18560   ins_pipe( pipe_slow );
18561 %}
18562 
18563 instruct reinterpret_expand(vec dst, vec src) %{
18564   predicate(UseAVX == 0 &&
18565             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18566   match(Set dst (VectorReinterpret src));
18567   ins_cost(125);
18568   effect(TEMP dst);
18569   format %{ "vector_reinterpret_expand $dst,$src" %}
18570   ins_encode %{
18571     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18572     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18573 
18574     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18575     if (src_vlen_in_bytes == 4) {
18576       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18577     } else {
18578       assert(src_vlen_in_bytes == 8, "");
18579       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18580     }
18581     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18582   %}
18583   ins_pipe( pipe_slow );
18584 %}
18585 
18586 instruct vreinterpret_expand4(legVec dst, vec src) %{
18587   predicate(UseAVX > 0 &&
18588             !n->bottom_type()->isa_vectmask() &&
18589             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18590             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18591   match(Set dst (VectorReinterpret src));
18592   ins_cost(125);
18593   format %{ "vector_reinterpret_expand $dst,$src" %}
18594   ins_encode %{
18595     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18596   %}
18597   ins_pipe( pipe_slow );
18598 %}
18599 
18600 
18601 instruct vreinterpret_expand(legVec dst, vec src) %{
18602   predicate(UseAVX > 0 &&
18603             !n->bottom_type()->isa_vectmask() &&
18604             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18605             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18606   match(Set dst (VectorReinterpret src));
18607   ins_cost(125);
18608   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18609   ins_encode %{
18610     switch (Matcher::vector_length_in_bytes(this, $src)) {
18611       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18612       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18613       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18614       default: ShouldNotReachHere();
18615     }
18616   %}
18617   ins_pipe( pipe_slow );
18618 %}
18619 
18620 instruct reinterpret_shrink(vec dst, legVec src) %{
18621   predicate(!n->bottom_type()->isa_vectmask() &&
18622             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18623   match(Set dst (VectorReinterpret src));
18624   ins_cost(125);
18625   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18626   ins_encode %{
18627     switch (Matcher::vector_length_in_bytes(this)) {
18628       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18629       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18630       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18631       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18632       default: ShouldNotReachHere();
18633     }
18634   %}
18635   ins_pipe( pipe_slow );
18636 %}
18637 
18638 // ----------------------------------------------------------------------------------------------------
18639 
18640 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18641   match(Set dst (RoundDoubleMode src rmode));
18642   format %{ "roundsd $dst,$src" %}
18643   ins_cost(150);
18644   ins_encode %{
18645     assert(UseSSE >= 4, "required");
18646     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18647       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18648     }
18649     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18650   %}
18651   ins_pipe(pipe_slow);
18652 %}
18653 
18654 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18655   match(Set dst (RoundDoubleMode con rmode));
18656   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18657   ins_cost(150);
18658   ins_encode %{
18659     assert(UseSSE >= 4, "required");
18660     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18661   %}
18662   ins_pipe(pipe_slow);
18663 %}
18664 
18665 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18666   predicate(Matcher::vector_length(n) < 8);
18667   match(Set dst (RoundDoubleModeV src rmode));
18668   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18669   ins_encode %{
18670     assert(UseAVX > 0, "required");
18671     int vlen_enc = vector_length_encoding(this);
18672     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18673   %}
18674   ins_pipe( pipe_slow );
18675 %}
18676 
18677 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18678   predicate(Matcher::vector_length(n) == 8);
18679   match(Set dst (RoundDoubleModeV src rmode));
18680   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18681   ins_encode %{
18682     assert(UseAVX > 2, "required");
18683     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18684   %}
18685   ins_pipe( pipe_slow );
18686 %}
18687 
18688 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18689   predicate(Matcher::vector_length(n) < 8);
18690   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18691   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18692   ins_encode %{
18693     assert(UseAVX > 0, "required");
18694     int vlen_enc = vector_length_encoding(this);
18695     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18696   %}
18697   ins_pipe( pipe_slow );
18698 %}
18699 
18700 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18701   predicate(Matcher::vector_length(n) == 8);
18702   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18703   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18704   ins_encode %{
18705     assert(UseAVX > 2, "required");
18706     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18707   %}
18708   ins_pipe( pipe_slow );
18709 %}
18710 
18711 instruct onspinwait() %{
18712   match(OnSpinWait);
18713   ins_cost(200);
18714 
18715   format %{
18716     $$template
18717     $$emit$$"pause\t! membar_onspinwait"
18718   %}
18719   ins_encode %{
18720     __ pause();
18721   %}
18722   ins_pipe(pipe_slow);
18723 %}
18724 
18725 // a * b + c
18726 instruct fmaD_reg(regD a, regD b, regD c) %{
18727   match(Set c (FmaD  c (Binary a b)));
18728   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18729   ins_cost(150);
18730   ins_encode %{
18731     assert(UseFMA, "Needs FMA instructions support.");
18732     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18733   %}
18734   ins_pipe( pipe_slow );
18735 %}
18736 
18737 // a * b + c
18738 instruct fmaF_reg(regF a, regF b, regF c) %{
18739   match(Set c (FmaF  c (Binary a b)));
18740   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18741   ins_cost(150);
18742   ins_encode %{
18743     assert(UseFMA, "Needs FMA instructions support.");
18744     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18745   %}
18746   ins_pipe( pipe_slow );
18747 %}
18748 
18749 // ====================VECTOR INSTRUCTIONS=====================================
18750 
18751 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18752 instruct MoveVec2Leg(legVec dst, vec src) %{
18753   match(Set dst src);
18754   format %{ "" %}
18755   ins_encode %{
18756     ShouldNotReachHere();
18757   %}
18758   ins_pipe( fpu_reg_reg );
18759 %}
18760 
18761 instruct MoveLeg2Vec(vec dst, legVec src) %{
18762   match(Set dst src);
18763   format %{ "" %}
18764   ins_encode %{
18765     ShouldNotReachHere();
18766   %}
18767   ins_pipe( fpu_reg_reg );
18768 %}
18769 
18770 // ============================================================================
18771 
18772 // Load vectors generic operand pattern
18773 instruct loadV(vec dst, memory mem) %{
18774   match(Set dst (LoadVector mem));
18775   ins_cost(125);
18776   format %{ "load_vector $dst,$mem" %}
18777   ins_encode %{
18778     BasicType bt = Matcher::vector_element_basic_type(this);
18779     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18780   %}
18781   ins_pipe( pipe_slow );
18782 %}
18783 
18784 // Store vectors generic operand pattern.
18785 instruct storeV(memory mem, vec src) %{
18786   match(Set mem (StoreVector mem src));
18787   ins_cost(145);
18788   format %{ "store_vector $mem,$src\n\t" %}
18789   ins_encode %{
18790     switch (Matcher::vector_length_in_bytes(this, $src)) {
18791       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18792       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18793       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18794       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18795       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18796       default: ShouldNotReachHere();
18797     }
18798   %}
18799   ins_pipe( pipe_slow );
18800 %}
18801 
18802 // ---------------------------------------- Gather ------------------------------------
18803 
18804 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18805 
18806 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18807   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18808             Matcher::vector_length_in_bytes(n) <= 32);
18809   match(Set dst (LoadVectorGather mem idx));
18810   effect(TEMP dst, TEMP tmp, TEMP mask);
18811   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18812   ins_encode %{
18813     int vlen_enc = vector_length_encoding(this);
18814     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18815     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18816     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18817     __ lea($tmp$$Register, $mem$$Address);
18818     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18819   %}
18820   ins_pipe( pipe_slow );
18821 %}
18822 
18823 
18824 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18825   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18826             !is_subword_type(Matcher::vector_element_basic_type(n)));
18827   match(Set dst (LoadVectorGather mem idx));
18828   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18829   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18830   ins_encode %{
18831     int vlen_enc = vector_length_encoding(this);
18832     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18833     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18834     __ lea($tmp$$Register, $mem$$Address);
18835     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18836   %}
18837   ins_pipe( pipe_slow );
18838 %}
18839 
18840 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18841   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18842             !is_subword_type(Matcher::vector_element_basic_type(n)));
18843   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18844   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18845   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18846   ins_encode %{
18847     assert(UseAVX > 2, "sanity");
18848     int vlen_enc = vector_length_encoding(this);
18849     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18850     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18851     // Note: Since gather instruction partially updates the opmask register used
18852     // for predication hense moving mask operand to a temporary.
18853     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18854     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18855     __ lea($tmp$$Register, $mem$$Address);
18856     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18857   %}
18858   ins_pipe( pipe_slow );
18859 %}
18860 
18861 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18862   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18863   match(Set dst (LoadVectorGather mem idx_base));
18864   effect(TEMP tmp, TEMP rtmp);
18865   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18866   ins_encode %{
18867     int vlen_enc = vector_length_encoding(this);
18868     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18869     __ lea($tmp$$Register, $mem$$Address);
18870     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18871   %}
18872   ins_pipe( pipe_slow );
18873 %}
18874 
18875 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18876                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18877   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18878   match(Set dst (LoadVectorGather mem idx_base));
18879   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18880   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18881   ins_encode %{
18882     int vlen_enc = vector_length_encoding(this);
18883     int vector_len = Matcher::vector_length(this);
18884     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18885     __ lea($tmp$$Register, $mem$$Address);
18886     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18887     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18888                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18889   %}
18890   ins_pipe( pipe_slow );
18891 %}
18892 
18893 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18894   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18895   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18896   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18897   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18898   ins_encode %{
18899     int vlen_enc = vector_length_encoding(this);
18900     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18901     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18902     __ lea($tmp$$Register, $mem$$Address);
18903     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18904     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18905   %}
18906   ins_pipe( pipe_slow );
18907 %}
18908 
18909 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18910                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18911   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18912   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18913   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18914   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18915   ins_encode %{
18916     int vlen_enc = vector_length_encoding(this);
18917     int vector_len = Matcher::vector_length(this);
18918     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18919     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18920     __ lea($tmp$$Register, $mem$$Address);
18921     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18922     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18923     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18924                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18925   %}
18926   ins_pipe( pipe_slow );
18927 %}
18928 
18929 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18930   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18931   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18932   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18933   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18934   ins_encode %{
18935     int vlen_enc = vector_length_encoding(this);
18936     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18937     __ lea($tmp$$Register, $mem$$Address);
18938     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18939     if (elem_bt == T_SHORT) {
18940       __ movl($mask_idx$$Register, 0x55555555);
18941       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18942     }
18943     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18944     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18945   %}
18946   ins_pipe( pipe_slow );
18947 %}
18948 
18949 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18950                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18951   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18952   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18953   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18954   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18955   ins_encode %{
18956     int vlen_enc = vector_length_encoding(this);
18957     int vector_len = Matcher::vector_length(this);
18958     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18959     __ lea($tmp$$Register, $mem$$Address);
18960     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18961     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18962     if (elem_bt == T_SHORT) {
18963       __ movl($mask_idx$$Register, 0x55555555);
18964       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18965     }
18966     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18967     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18968                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18969   %}
18970   ins_pipe( pipe_slow );
18971 %}
18972 
18973 // ====================Scatter=======================================
18974 
18975 // Scatter INT, LONG, FLOAT, DOUBLE
18976 
18977 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18978   predicate(UseAVX > 2);
18979   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18980   effect(TEMP tmp, TEMP ktmp);
18981   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18982   ins_encode %{
18983     int vlen_enc = vector_length_encoding(this, $src);
18984     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18985 
18986     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18987     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18988 
18989     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18990     __ lea($tmp$$Register, $mem$$Address);
18991     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18992   %}
18993   ins_pipe( pipe_slow );
18994 %}
18995 
18996 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18997   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18998   effect(TEMP tmp, TEMP ktmp);
18999   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
19000   ins_encode %{
19001     int vlen_enc = vector_length_encoding(this, $src);
19002     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
19003     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
19004     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
19005     // Note: Since scatter instruction partially updates the opmask register used
19006     // for predication hense moving mask operand to a temporary.
19007     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
19008     __ lea($tmp$$Register, $mem$$Address);
19009     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
19010   %}
19011   ins_pipe( pipe_slow );
19012 %}
19013 
19014 // ====================REPLICATE=======================================
19015 
19016 // Replicate byte scalar to be vector
19017 instruct vReplB_reg(vec dst, rRegI src) %{
19018   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
19019   match(Set dst (Replicate src));
19020   format %{ "replicateB $dst,$src" %}
19021   ins_encode %{
19022     uint vlen = Matcher::vector_length(this);
19023     if (UseAVX >= 2) {
19024       int vlen_enc = vector_length_encoding(this);
19025       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19026         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
19027         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
19028       } else {
19029         __ movdl($dst$$XMMRegister, $src$$Register);
19030         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19031       }
19032     } else {
19033        assert(UseAVX < 2, "");
19034       __ movdl($dst$$XMMRegister, $src$$Register);
19035       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
19036       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19037       if (vlen >= 16) {
19038         assert(vlen == 16, "");
19039         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19040       }
19041     }
19042   %}
19043   ins_pipe( pipe_slow );
19044 %}
19045 
19046 instruct ReplB_mem(vec dst, memory mem) %{
19047   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
19048   match(Set dst (Replicate (LoadB mem)));
19049   format %{ "replicateB $dst,$mem" %}
19050   ins_encode %{
19051     int vlen_enc = vector_length_encoding(this);
19052     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
19053   %}
19054   ins_pipe( pipe_slow );
19055 %}
19056 
19057 // ====================ReplicateS=======================================
19058 
19059 instruct vReplS_reg(vec dst, rRegI src) %{
19060   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
19061   match(Set dst (Replicate src));
19062   format %{ "replicateS $dst,$src" %}
19063   ins_encode %{
19064     uint vlen = Matcher::vector_length(this);
19065     int vlen_enc = vector_length_encoding(this);
19066     if (UseAVX >= 2) {
19067       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19068         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
19069         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
19070       } else {
19071         __ movdl($dst$$XMMRegister, $src$$Register);
19072         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19073       }
19074     } else {
19075       assert(UseAVX < 2, "");
19076       __ movdl($dst$$XMMRegister, $src$$Register);
19077       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19078       if (vlen >= 8) {
19079         assert(vlen == 8, "");
19080         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19081       }
19082     }
19083   %}
19084   ins_pipe( pipe_slow );
19085 %}
19086 
19087 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
19088   match(Set dst (Replicate con));
19089   effect(TEMP rtmp);
19090   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
19091   ins_encode %{
19092     int vlen_enc = vector_length_encoding(this);
19093     BasicType bt = Matcher::vector_element_basic_type(this);
19094     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
19095     __ movl($rtmp$$Register, $con$$constant);
19096     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19097   %}
19098   ins_pipe( pipe_slow );
19099 %}
19100 
19101 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
19102   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
19103   match(Set dst (Replicate src));
19104   effect(TEMP rtmp);
19105   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
19106   ins_encode %{
19107     int vlen_enc = vector_length_encoding(this);
19108     __ vmovw($rtmp$$Register, $src$$XMMRegister);
19109     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19110   %}
19111   ins_pipe( pipe_slow );
19112 %}
19113 
19114 instruct ReplS_mem(vec dst, memory mem) %{
19115   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
19116   match(Set dst (Replicate (LoadS mem)));
19117   format %{ "replicateS $dst,$mem" %}
19118   ins_encode %{
19119     int vlen_enc = vector_length_encoding(this);
19120     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
19121   %}
19122   ins_pipe( pipe_slow );
19123 %}
19124 
19125 // ====================ReplicateI=======================================
19126 
19127 instruct ReplI_reg(vec dst, rRegI src) %{
19128   predicate(Matcher::vector_element_basic_type(n) == T_INT);
19129   match(Set dst (Replicate src));
19130   format %{ "replicateI $dst,$src" %}
19131   ins_encode %{
19132     uint vlen = Matcher::vector_length(this);
19133     int vlen_enc = vector_length_encoding(this);
19134     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19135       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
19136     } else if (VM_Version::supports_avx2()) {
19137       __ movdl($dst$$XMMRegister, $src$$Register);
19138       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19139     } else {
19140       __ movdl($dst$$XMMRegister, $src$$Register);
19141       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19142     }
19143   %}
19144   ins_pipe( pipe_slow );
19145 %}
19146 
19147 instruct ReplI_mem(vec dst, memory mem) %{
19148   predicate(Matcher::vector_element_basic_type(n) == T_INT);
19149   match(Set dst (Replicate (LoadI mem)));
19150   format %{ "replicateI $dst,$mem" %}
19151   ins_encode %{
19152     int vlen_enc = vector_length_encoding(this);
19153     if (VM_Version::supports_avx2()) {
19154       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19155     } else if (VM_Version::supports_avx()) {
19156       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19157     } else {
19158       __ movdl($dst$$XMMRegister, $mem$$Address);
19159       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19160     }
19161   %}
19162   ins_pipe( pipe_slow );
19163 %}
19164 
19165 instruct ReplI_imm(vec dst, immI con) %{
19166   predicate(Matcher::is_non_long_integral_vector(n));
19167   match(Set dst (Replicate con));
19168   format %{ "replicateI $dst,$con" %}
19169   ins_encode %{
19170     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
19171                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
19172                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
19173     BasicType bt = Matcher::vector_element_basic_type(this);
19174     int vlen = Matcher::vector_length_in_bytes(this);
19175     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
19176   %}
19177   ins_pipe( pipe_slow );
19178 %}
19179 
19180 // Replicate scalar zero to be vector
19181 instruct ReplI_zero(vec dst, immI_0 zero) %{
19182   predicate(Matcher::is_non_long_integral_vector(n));
19183   match(Set dst (Replicate zero));
19184   format %{ "replicateI $dst,$zero" %}
19185   ins_encode %{
19186     int vlen_enc = vector_length_encoding(this);
19187     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19188       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19189     } else {
19190       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19191     }
19192   %}
19193   ins_pipe( fpu_reg_reg );
19194 %}
19195 
19196 instruct ReplI_M1(vec dst, immI_M1 con) %{
19197   predicate(Matcher::is_non_long_integral_vector(n));
19198   match(Set dst (Replicate con));
19199   format %{ "vallones $dst" %}
19200   ins_encode %{
19201     int vector_len = vector_length_encoding(this);
19202     __ vallones($dst$$XMMRegister, vector_len);
19203   %}
19204   ins_pipe( pipe_slow );
19205 %}
19206 
19207 // ====================ReplicateL=======================================
19208 
19209 // Replicate long (8 byte) scalar to be vector
19210 instruct ReplL_reg(vec dst, rRegL src) %{
19211   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19212   match(Set dst (Replicate src));
19213   format %{ "replicateL $dst,$src" %}
19214   ins_encode %{
19215     int vlen = Matcher::vector_length(this);
19216     int vlen_enc = vector_length_encoding(this);
19217     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19218       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
19219     } else if (VM_Version::supports_avx2()) {
19220       __ movdq($dst$$XMMRegister, $src$$Register);
19221       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19222     } else {
19223       __ movdq($dst$$XMMRegister, $src$$Register);
19224       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19225     }
19226   %}
19227   ins_pipe( pipe_slow );
19228 %}
19229 
19230 instruct ReplL_mem(vec dst, memory mem) %{
19231   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19232   match(Set dst (Replicate (LoadL mem)));
19233   format %{ "replicateL $dst,$mem" %}
19234   ins_encode %{
19235     int vlen_enc = vector_length_encoding(this);
19236     if (VM_Version::supports_avx2()) {
19237       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
19238     } else if (VM_Version::supports_sse3()) {
19239       __ movddup($dst$$XMMRegister, $mem$$Address);
19240     } else {
19241       __ movq($dst$$XMMRegister, $mem$$Address);
19242       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19243     }
19244   %}
19245   ins_pipe( pipe_slow );
19246 %}
19247 
19248 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
19249 instruct ReplL_imm(vec dst, immL con) %{
19250   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19251   match(Set dst (Replicate con));
19252   format %{ "replicateL $dst,$con" %}
19253   ins_encode %{
19254     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19255     int vlen = Matcher::vector_length_in_bytes(this);
19256     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
19257   %}
19258   ins_pipe( pipe_slow );
19259 %}
19260 
19261 instruct ReplL_zero(vec dst, immL0 zero) %{
19262   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19263   match(Set dst (Replicate zero));
19264   format %{ "replicateL $dst,$zero" %}
19265   ins_encode %{
19266     int vlen_enc = vector_length_encoding(this);
19267     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19268       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19269     } else {
19270       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19271     }
19272   %}
19273   ins_pipe( fpu_reg_reg );
19274 %}
19275 
19276 instruct ReplL_M1(vec dst, immL_M1 con) %{
19277   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19278   match(Set dst (Replicate con));
19279   format %{ "vallones $dst" %}
19280   ins_encode %{
19281     int vector_len = vector_length_encoding(this);
19282     __ vallones($dst$$XMMRegister, vector_len);
19283   %}
19284   ins_pipe( pipe_slow );
19285 %}
19286 
19287 // ====================ReplicateF=======================================
19288 
19289 instruct vReplF_reg(vec dst, vlRegF src) %{
19290   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19291   match(Set dst (Replicate src));
19292   format %{ "replicateF $dst,$src" %}
19293   ins_encode %{
19294     uint vlen = Matcher::vector_length(this);
19295     int vlen_enc = vector_length_encoding(this);
19296     if (vlen <= 4) {
19297       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19298     } else if (VM_Version::supports_avx2()) {
19299       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19300     } else {
19301       assert(vlen == 8, "sanity");
19302       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19303       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19304     }
19305   %}
19306   ins_pipe( pipe_slow );
19307 %}
19308 
19309 instruct ReplF_reg(vec dst, vlRegF src) %{
19310   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19311   match(Set dst (Replicate src));
19312   format %{ "replicateF $dst,$src" %}
19313   ins_encode %{
19314     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19315   %}
19316   ins_pipe( pipe_slow );
19317 %}
19318 
19319 instruct ReplF_mem(vec dst, memory mem) %{
19320   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19321   match(Set dst (Replicate (LoadF mem)));
19322   format %{ "replicateF $dst,$mem" %}
19323   ins_encode %{
19324     int vlen_enc = vector_length_encoding(this);
19325     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19326   %}
19327   ins_pipe( pipe_slow );
19328 %}
19329 
19330 // Replicate float scalar immediate to be vector by loading from const table.
19331 instruct ReplF_imm(vec dst, immF con) %{
19332   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19333   match(Set dst (Replicate con));
19334   format %{ "replicateF $dst,$con" %}
19335   ins_encode %{
19336     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19337                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19338     int vlen = Matcher::vector_length_in_bytes(this);
19339     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19340   %}
19341   ins_pipe( pipe_slow );
19342 %}
19343 
19344 instruct ReplF_zero(vec dst, immF0 zero) %{
19345   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19346   match(Set dst (Replicate zero));
19347   format %{ "replicateF $dst,$zero" %}
19348   ins_encode %{
19349     int vlen_enc = vector_length_encoding(this);
19350     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19351       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19352     } else {
19353       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19354     }
19355   %}
19356   ins_pipe( fpu_reg_reg );
19357 %}
19358 
19359 // ====================ReplicateD=======================================
19360 
19361 // Replicate double (8 bytes) scalar to be vector
19362 instruct vReplD_reg(vec dst, vlRegD src) %{
19363   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19364   match(Set dst (Replicate src));
19365   format %{ "replicateD $dst,$src" %}
19366   ins_encode %{
19367     uint vlen = Matcher::vector_length(this);
19368     int vlen_enc = vector_length_encoding(this);
19369     if (vlen <= 2) {
19370       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19371     } else if (VM_Version::supports_avx2()) {
19372       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19373     } else {
19374       assert(vlen == 4, "sanity");
19375       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19376       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19377     }
19378   %}
19379   ins_pipe( pipe_slow );
19380 %}
19381 
19382 instruct ReplD_reg(vec dst, vlRegD src) %{
19383   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19384   match(Set dst (Replicate src));
19385   format %{ "replicateD $dst,$src" %}
19386   ins_encode %{
19387     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19388   %}
19389   ins_pipe( pipe_slow );
19390 %}
19391 
19392 instruct ReplD_mem(vec dst, memory mem) %{
19393   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19394   match(Set dst (Replicate (LoadD mem)));
19395   format %{ "replicateD $dst,$mem" %}
19396   ins_encode %{
19397     if (Matcher::vector_length(this) >= 4) {
19398       int vlen_enc = vector_length_encoding(this);
19399       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19400     } else {
19401       __ movddup($dst$$XMMRegister, $mem$$Address);
19402     }
19403   %}
19404   ins_pipe( pipe_slow );
19405 %}
19406 
19407 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19408 instruct ReplD_imm(vec dst, immD con) %{
19409   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19410   match(Set dst (Replicate con));
19411   format %{ "replicateD $dst,$con" %}
19412   ins_encode %{
19413     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19414     int vlen = Matcher::vector_length_in_bytes(this);
19415     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19416   %}
19417   ins_pipe( pipe_slow );
19418 %}
19419 
19420 instruct ReplD_zero(vec dst, immD0 zero) %{
19421   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19422   match(Set dst (Replicate zero));
19423   format %{ "replicateD $dst,$zero" %}
19424   ins_encode %{
19425     int vlen_enc = vector_length_encoding(this);
19426     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19427       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19428     } else {
19429       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19430     }
19431   %}
19432   ins_pipe( fpu_reg_reg );
19433 %}
19434 
19435 // ====================VECTOR INSERT=======================================
19436 
19437 instruct insert(vec dst, rRegI val, immU8 idx) %{
19438   predicate(Matcher::vector_length_in_bytes(n) < 32);
19439   match(Set dst (VectorInsert (Binary dst val) idx));
19440   format %{ "vector_insert $dst,$val,$idx" %}
19441   ins_encode %{
19442     assert(UseSSE >= 4, "required");
19443     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19444 
19445     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19446 
19447     assert(is_integral_type(elem_bt), "");
19448     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19449 
19450     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19451   %}
19452   ins_pipe( pipe_slow );
19453 %}
19454 
19455 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19456   predicate(Matcher::vector_length_in_bytes(n) == 32);
19457   match(Set dst (VectorInsert (Binary src val) idx));
19458   effect(TEMP vtmp);
19459   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19460   ins_encode %{
19461     int vlen_enc = Assembler::AVX_256bit;
19462     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19463     int elem_per_lane = 16/type2aelembytes(elem_bt);
19464     int log2epr = log2(elem_per_lane);
19465 
19466     assert(is_integral_type(elem_bt), "sanity");
19467     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19468 
19469     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19470     uint y_idx = ($idx$$constant >> log2epr) & 1;
19471     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19472     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19473     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19474   %}
19475   ins_pipe( pipe_slow );
19476 %}
19477 
19478 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19479   predicate(Matcher::vector_length_in_bytes(n) == 64);
19480   match(Set dst (VectorInsert (Binary src val) idx));
19481   effect(TEMP vtmp);
19482   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19483   ins_encode %{
19484     assert(UseAVX > 2, "sanity");
19485 
19486     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19487     int elem_per_lane = 16/type2aelembytes(elem_bt);
19488     int log2epr = log2(elem_per_lane);
19489 
19490     assert(is_integral_type(elem_bt), "");
19491     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19492 
19493     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19494     uint y_idx = ($idx$$constant >> log2epr) & 3;
19495     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19496     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19497     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19498   %}
19499   ins_pipe( pipe_slow );
19500 %}
19501 
19502 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19503   predicate(Matcher::vector_length(n) == 2);
19504   match(Set dst (VectorInsert (Binary dst val) idx));
19505   format %{ "vector_insert $dst,$val,$idx" %}
19506   ins_encode %{
19507     assert(UseSSE >= 4, "required");
19508     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19509     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19510 
19511     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19512   %}
19513   ins_pipe( pipe_slow );
19514 %}
19515 
19516 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19517   predicate(Matcher::vector_length(n) == 4);
19518   match(Set dst (VectorInsert (Binary src val) idx));
19519   effect(TEMP vtmp);
19520   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19521   ins_encode %{
19522     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19523     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19524 
19525     uint x_idx = $idx$$constant & right_n_bits(1);
19526     uint y_idx = ($idx$$constant >> 1) & 1;
19527     int vlen_enc = Assembler::AVX_256bit;
19528     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19529     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19530     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19531   %}
19532   ins_pipe( pipe_slow );
19533 %}
19534 
19535 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19536   predicate(Matcher::vector_length(n) == 8);
19537   match(Set dst (VectorInsert (Binary src val) idx));
19538   effect(TEMP vtmp);
19539   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19540   ins_encode %{
19541     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19542     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19543 
19544     uint x_idx = $idx$$constant & right_n_bits(1);
19545     uint y_idx = ($idx$$constant >> 1) & 3;
19546     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19547     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19548     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19549   %}
19550   ins_pipe( pipe_slow );
19551 %}
19552 
19553 instruct insertF(vec dst, regF val, immU8 idx) %{
19554   predicate(Matcher::vector_length(n) < 8);
19555   match(Set dst (VectorInsert (Binary dst val) idx));
19556   format %{ "vector_insert $dst,$val,$idx" %}
19557   ins_encode %{
19558     assert(UseSSE >= 4, "sanity");
19559 
19560     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19561     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19562 
19563     uint x_idx = $idx$$constant & right_n_bits(2);
19564     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19565   %}
19566   ins_pipe( pipe_slow );
19567 %}
19568 
19569 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19570   predicate(Matcher::vector_length(n) >= 8);
19571   match(Set dst (VectorInsert (Binary src val) idx));
19572   effect(TEMP vtmp);
19573   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19574   ins_encode %{
19575     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19576     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19577 
19578     int vlen = Matcher::vector_length(this);
19579     uint x_idx = $idx$$constant & right_n_bits(2);
19580     if (vlen == 8) {
19581       uint y_idx = ($idx$$constant >> 2) & 1;
19582       int vlen_enc = Assembler::AVX_256bit;
19583       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19584       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19585       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19586     } else {
19587       assert(vlen == 16, "sanity");
19588       uint y_idx = ($idx$$constant >> 2) & 3;
19589       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19590       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19591       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19592     }
19593   %}
19594   ins_pipe( pipe_slow );
19595 %}
19596 
19597 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19598   predicate(Matcher::vector_length(n) == 2);
19599   match(Set dst (VectorInsert (Binary dst val) idx));
19600   effect(TEMP tmp);
19601   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19602   ins_encode %{
19603     assert(UseSSE >= 4, "sanity");
19604     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19605     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19606 
19607     __ movq($tmp$$Register, $val$$XMMRegister);
19608     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19609   %}
19610   ins_pipe( pipe_slow );
19611 %}
19612 
19613 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19614   predicate(Matcher::vector_length(n) == 4);
19615   match(Set dst (VectorInsert (Binary src val) idx));
19616   effect(TEMP vtmp, TEMP tmp);
19617   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19618   ins_encode %{
19619     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19620     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19621 
19622     uint x_idx = $idx$$constant & right_n_bits(1);
19623     uint y_idx = ($idx$$constant >> 1) & 1;
19624     int vlen_enc = Assembler::AVX_256bit;
19625     __ movq($tmp$$Register, $val$$XMMRegister);
19626     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19627     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19628     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19629   %}
19630   ins_pipe( pipe_slow );
19631 %}
19632 
19633 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19634   predicate(Matcher::vector_length(n) == 8);
19635   match(Set dst (VectorInsert (Binary src val) idx));
19636   effect(TEMP tmp, TEMP vtmp);
19637   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19638   ins_encode %{
19639     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19640     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19641 
19642     uint x_idx = $idx$$constant & right_n_bits(1);
19643     uint y_idx = ($idx$$constant >> 1) & 3;
19644     __ movq($tmp$$Register, $val$$XMMRegister);
19645     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19646     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19647     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19648   %}
19649   ins_pipe( pipe_slow );
19650 %}
19651 
19652 // ====================REDUCTION ARITHMETIC=======================================
19653 
19654 // =======================Int Reduction==========================================
19655 
19656 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19657   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19658   match(Set dst (AddReductionVI src1 src2));
19659   match(Set dst (MulReductionVI src1 src2));
19660   match(Set dst (AndReductionV  src1 src2));
19661   match(Set dst ( OrReductionV  src1 src2));
19662   match(Set dst (XorReductionV  src1 src2));
19663   match(Set dst (MinReductionV  src1 src2));
19664   match(Set dst (MaxReductionV  src1 src2));
19665   effect(TEMP vtmp1, TEMP vtmp2);
19666   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19667   ins_encode %{
19668     int opcode = this->ideal_Opcode();
19669     int vlen = Matcher::vector_length(this, $src2);
19670     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19671   %}
19672   ins_pipe( pipe_slow );
19673 %}
19674 
19675 // =======================Long Reduction==========================================
19676 
19677 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19678   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19679   match(Set dst (AddReductionVL src1 src2));
19680   match(Set dst (MulReductionVL src1 src2));
19681   match(Set dst (AndReductionV  src1 src2));
19682   match(Set dst ( OrReductionV  src1 src2));
19683   match(Set dst (XorReductionV  src1 src2));
19684   match(Set dst (MinReductionV  src1 src2));
19685   match(Set dst (MaxReductionV  src1 src2));
19686   effect(TEMP vtmp1, TEMP vtmp2);
19687   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19688   ins_encode %{
19689     int opcode = this->ideal_Opcode();
19690     int vlen = Matcher::vector_length(this, $src2);
19691     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19692   %}
19693   ins_pipe( pipe_slow );
19694 %}
19695 
19696 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19697   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19698   match(Set dst (AddReductionVL src1 src2));
19699   match(Set dst (MulReductionVL src1 src2));
19700   match(Set dst (AndReductionV  src1 src2));
19701   match(Set dst ( OrReductionV  src1 src2));
19702   match(Set dst (XorReductionV  src1 src2));
19703   match(Set dst (MinReductionV  src1 src2));
19704   match(Set dst (MaxReductionV  src1 src2));
19705   effect(TEMP vtmp1, TEMP vtmp2);
19706   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19707   ins_encode %{
19708     int opcode = this->ideal_Opcode();
19709     int vlen = Matcher::vector_length(this, $src2);
19710     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19711   %}
19712   ins_pipe( pipe_slow );
19713 %}
19714 
19715 // =======================Float Reduction==========================================
19716 
19717 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19718   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19719   match(Set dst (AddReductionVF dst src));
19720   match(Set dst (MulReductionVF dst src));
19721   effect(TEMP dst, TEMP vtmp);
19722   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19723   ins_encode %{
19724     int opcode = this->ideal_Opcode();
19725     int vlen = Matcher::vector_length(this, $src);
19726     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19727   %}
19728   ins_pipe( pipe_slow );
19729 %}
19730 
19731 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19732   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19733   match(Set dst (AddReductionVF dst src));
19734   match(Set dst (MulReductionVF dst src));
19735   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19736   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19737   ins_encode %{
19738     int opcode = this->ideal_Opcode();
19739     int vlen = Matcher::vector_length(this, $src);
19740     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19741   %}
19742   ins_pipe( pipe_slow );
19743 %}
19744 
19745 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19746   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19747   match(Set dst (AddReductionVF dst src));
19748   match(Set dst (MulReductionVF dst src));
19749   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19750   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19751   ins_encode %{
19752     int opcode = this->ideal_Opcode();
19753     int vlen = Matcher::vector_length(this, $src);
19754     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19755   %}
19756   ins_pipe( pipe_slow );
19757 %}
19758 
19759 
19760 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19761   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19762   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19763   // src1 contains reduction identity
19764   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19765   match(Set dst (AddReductionVF src1 src2));
19766   match(Set dst (MulReductionVF src1 src2));
19767   effect(TEMP dst);
19768   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19769   ins_encode %{
19770     int opcode = this->ideal_Opcode();
19771     int vlen = Matcher::vector_length(this, $src2);
19772     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19773   %}
19774   ins_pipe( pipe_slow );
19775 %}
19776 
19777 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19778   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19779   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19780   // src1 contains reduction identity
19781   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19782   match(Set dst (AddReductionVF src1 src2));
19783   match(Set dst (MulReductionVF src1 src2));
19784   effect(TEMP dst, TEMP vtmp);
19785   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19786   ins_encode %{
19787     int opcode = this->ideal_Opcode();
19788     int vlen = Matcher::vector_length(this, $src2);
19789     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19790   %}
19791   ins_pipe( pipe_slow );
19792 %}
19793 
19794 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19795   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19796   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19797   // src1 contains reduction identity
19798   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19799   match(Set dst (AddReductionVF src1 src2));
19800   match(Set dst (MulReductionVF src1 src2));
19801   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19802   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19803   ins_encode %{
19804     int opcode = this->ideal_Opcode();
19805     int vlen = Matcher::vector_length(this, $src2);
19806     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19807   %}
19808   ins_pipe( pipe_slow );
19809 %}
19810 
19811 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19812   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19813   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19814   // src1 contains reduction identity
19815   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19816   match(Set dst (AddReductionVF src1 src2));
19817   match(Set dst (MulReductionVF src1 src2));
19818   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19819   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19820   ins_encode %{
19821     int opcode = this->ideal_Opcode();
19822     int vlen = Matcher::vector_length(this, $src2);
19823     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19824   %}
19825   ins_pipe( pipe_slow );
19826 %}
19827 
19828 // =======================Double Reduction==========================================
19829 
19830 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19831   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19832   match(Set dst (AddReductionVD dst src));
19833   match(Set dst (MulReductionVD dst src));
19834   effect(TEMP dst, TEMP vtmp);
19835   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19836   ins_encode %{
19837     int opcode = this->ideal_Opcode();
19838     int vlen = Matcher::vector_length(this, $src);
19839     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19840 %}
19841   ins_pipe( pipe_slow );
19842 %}
19843 
19844 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19845   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19846   match(Set dst (AddReductionVD dst src));
19847   match(Set dst (MulReductionVD dst src));
19848   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19849   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19850   ins_encode %{
19851     int opcode = this->ideal_Opcode();
19852     int vlen = Matcher::vector_length(this, $src);
19853     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19854   %}
19855   ins_pipe( pipe_slow );
19856 %}
19857 
19858 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19859   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19860   match(Set dst (AddReductionVD dst src));
19861   match(Set dst (MulReductionVD dst src));
19862   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19863   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19864   ins_encode %{
19865     int opcode = this->ideal_Opcode();
19866     int vlen = Matcher::vector_length(this, $src);
19867     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19868   %}
19869   ins_pipe( pipe_slow );
19870 %}
19871 
19872 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19873   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19874   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19875   // src1 contains reduction identity
19876   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19877   match(Set dst (AddReductionVD src1 src2));
19878   match(Set dst (MulReductionVD src1 src2));
19879   effect(TEMP dst);
19880   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19881   ins_encode %{
19882     int opcode = this->ideal_Opcode();
19883     int vlen = Matcher::vector_length(this, $src2);
19884     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19885 %}
19886   ins_pipe( pipe_slow );
19887 %}
19888 
19889 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19890   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19891   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19892   // src1 contains reduction identity
19893   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19894   match(Set dst (AddReductionVD src1 src2));
19895   match(Set dst (MulReductionVD src1 src2));
19896   effect(TEMP dst, TEMP vtmp);
19897   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19898   ins_encode %{
19899     int opcode = this->ideal_Opcode();
19900     int vlen = Matcher::vector_length(this, $src2);
19901     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19902   %}
19903   ins_pipe( pipe_slow );
19904 %}
19905 
19906 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19907   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19908   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19909   // src1 contains reduction identity
19910   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19911   match(Set dst (AddReductionVD src1 src2));
19912   match(Set dst (MulReductionVD src1 src2));
19913   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19914   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19915   ins_encode %{
19916     int opcode = this->ideal_Opcode();
19917     int vlen = Matcher::vector_length(this, $src2);
19918     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19919   %}
19920   ins_pipe( pipe_slow );
19921 %}
19922 
19923 // =======================Byte Reduction==========================================
19924 
19925 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19926   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19927   match(Set dst (AddReductionVI src1 src2));
19928   match(Set dst (AndReductionV  src1 src2));
19929   match(Set dst ( OrReductionV  src1 src2));
19930   match(Set dst (XorReductionV  src1 src2));
19931   match(Set dst (MinReductionV  src1 src2));
19932   match(Set dst (MaxReductionV  src1 src2));
19933   effect(TEMP vtmp1, TEMP vtmp2);
19934   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19935   ins_encode %{
19936     int opcode = this->ideal_Opcode();
19937     int vlen = Matcher::vector_length(this, $src2);
19938     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19939   %}
19940   ins_pipe( pipe_slow );
19941 %}
19942 
19943 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19944   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19945   match(Set dst (AddReductionVI src1 src2));
19946   match(Set dst (AndReductionV  src1 src2));
19947   match(Set dst ( OrReductionV  src1 src2));
19948   match(Set dst (XorReductionV  src1 src2));
19949   match(Set dst (MinReductionV  src1 src2));
19950   match(Set dst (MaxReductionV  src1 src2));
19951   effect(TEMP vtmp1, TEMP vtmp2);
19952   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19953   ins_encode %{
19954     int opcode = this->ideal_Opcode();
19955     int vlen = Matcher::vector_length(this, $src2);
19956     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19957   %}
19958   ins_pipe( pipe_slow );
19959 %}
19960 
19961 // =======================Short Reduction==========================================
19962 
19963 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19964   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19965   match(Set dst (AddReductionVI src1 src2));
19966   match(Set dst (MulReductionVI src1 src2));
19967   match(Set dst (AndReductionV  src1 src2));
19968   match(Set dst ( OrReductionV  src1 src2));
19969   match(Set dst (XorReductionV  src1 src2));
19970   match(Set dst (MinReductionV  src1 src2));
19971   match(Set dst (MaxReductionV  src1 src2));
19972   effect(TEMP vtmp1, TEMP vtmp2);
19973   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19974   ins_encode %{
19975     int opcode = this->ideal_Opcode();
19976     int vlen = Matcher::vector_length(this, $src2);
19977     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19978   %}
19979   ins_pipe( pipe_slow );
19980 %}
19981 
19982 // =======================Mul Reduction==========================================
19983 
19984 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19985   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19986             Matcher::vector_length(n->in(2)) <= 32); // src2
19987   match(Set dst (MulReductionVI src1 src2));
19988   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19989   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19990   ins_encode %{
19991     int opcode = this->ideal_Opcode();
19992     int vlen = Matcher::vector_length(this, $src2);
19993     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19994   %}
19995   ins_pipe( pipe_slow );
19996 %}
19997 
19998 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19999   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
20000             Matcher::vector_length(n->in(2)) == 64); // src2
20001   match(Set dst (MulReductionVI src1 src2));
20002   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
20003   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
20004   ins_encode %{
20005     int opcode = this->ideal_Opcode();
20006     int vlen = Matcher::vector_length(this, $src2);
20007     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20008   %}
20009   ins_pipe( pipe_slow );
20010 %}
20011 
20012 //--------------------Min/Max Float Reduction --------------------
20013 // Float Min Reduction
20014 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20015                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20016   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20017             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20018              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20019             Matcher::vector_length(n->in(2)) == 2);
20020   match(Set dst (MinReductionV src1 src2));
20021   match(Set dst (MaxReductionV src1 src2));
20022   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20023   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20024   ins_encode %{
20025     assert(UseAVX > 0, "sanity");
20026 
20027     int opcode = this->ideal_Opcode();
20028     int vlen = Matcher::vector_length(this, $src2);
20029     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20030                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20031   %}
20032   ins_pipe( pipe_slow );
20033 %}
20034 
20035 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20036                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20037   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20038             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20039              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20040             Matcher::vector_length(n->in(2)) >= 4);
20041   match(Set dst (MinReductionV src1 src2));
20042   match(Set dst (MaxReductionV src1 src2));
20043   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20044   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20045   ins_encode %{
20046     assert(UseAVX > 0, "sanity");
20047 
20048     int opcode = this->ideal_Opcode();
20049     int vlen = Matcher::vector_length(this, $src2);
20050     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20051                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20052   %}
20053   ins_pipe( pipe_slow );
20054 %}
20055 
20056 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
20057                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20058   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20059             Matcher::vector_length(n->in(2)) == 2);
20060   match(Set dst (MinReductionV dst src));
20061   match(Set dst (MaxReductionV dst src));
20062   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20063   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20064   ins_encode %{
20065     assert(UseAVX > 0, "sanity");
20066 
20067     int opcode = this->ideal_Opcode();
20068     int vlen = Matcher::vector_length(this, $src);
20069     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20070                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20071   %}
20072   ins_pipe( pipe_slow );
20073 %}
20074 
20075 
20076 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
20077                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20078   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20079             Matcher::vector_length(n->in(2)) >= 4);
20080   match(Set dst (MinReductionV dst src));
20081   match(Set dst (MaxReductionV dst src));
20082   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20083   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20084   ins_encode %{
20085     assert(UseAVX > 0, "sanity");
20086 
20087     int opcode = this->ideal_Opcode();
20088     int vlen = Matcher::vector_length(this, $src);
20089     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20090                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20091   %}
20092   ins_pipe( pipe_slow );
20093 %}
20094 
20095 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
20096   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20097             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20098              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20099             Matcher::vector_length(n->in(2)) == 2);
20100   match(Set dst (MinReductionV src1 src2));
20101   match(Set dst (MaxReductionV src1 src2));
20102   effect(TEMP dst, TEMP xtmp1);
20103   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
20104   ins_encode %{
20105     int opcode = this->ideal_Opcode();
20106     int vlen = Matcher::vector_length(this, $src2);
20107     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20108                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20109   %}
20110   ins_pipe( pipe_slow );
20111 %}
20112 
20113 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
20114   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20115             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20116              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20117             Matcher::vector_length(n->in(2)) >= 4);
20118   match(Set dst (MinReductionV src1 src2));
20119   match(Set dst (MaxReductionV src1 src2));
20120   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20121   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
20122   ins_encode %{
20123     int opcode = this->ideal_Opcode();
20124     int vlen = Matcher::vector_length(this, $src2);
20125     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20126                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20127   %}
20128   ins_pipe( pipe_slow );
20129 %}
20130 
20131 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
20132   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20133             Matcher::vector_length(n->in(2)) == 2);
20134   match(Set dst (MinReductionV dst src));
20135   match(Set dst (MaxReductionV dst src));
20136   effect(TEMP dst, TEMP xtmp1);
20137   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
20138   ins_encode %{
20139     int opcode = this->ideal_Opcode();
20140     int vlen = Matcher::vector_length(this, $src);
20141     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20142                          $xtmp1$$XMMRegister);
20143   %}
20144   ins_pipe( pipe_slow );
20145 %}
20146 
20147 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
20148   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20149             Matcher::vector_length(n->in(2)) >= 4);
20150   match(Set dst (MinReductionV dst src));
20151   match(Set dst (MaxReductionV dst src));
20152   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20153   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
20154   ins_encode %{
20155     int opcode = this->ideal_Opcode();
20156     int vlen = Matcher::vector_length(this, $src);
20157     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20158                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20159   %}
20160   ins_pipe( pipe_slow );
20161 %}
20162 
20163 //--------------------Min Double Reduction --------------------
20164 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20165                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20166   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20167             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20168              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20169             Matcher::vector_length(n->in(2)) == 2);
20170   match(Set dst (MinReductionV src1 src2));
20171   match(Set dst (MaxReductionV src1 src2));
20172   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20173   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20174   ins_encode %{
20175     assert(UseAVX > 0, "sanity");
20176 
20177     int opcode = this->ideal_Opcode();
20178     int vlen = Matcher::vector_length(this, $src2);
20179     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20180                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20181   %}
20182   ins_pipe( pipe_slow );
20183 %}
20184 
20185 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20186                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20187   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20188             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20189              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20190             Matcher::vector_length(n->in(2)) >= 4);
20191   match(Set dst (MinReductionV src1 src2));
20192   match(Set dst (MaxReductionV src1 src2));
20193   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20194   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20195   ins_encode %{
20196     assert(UseAVX > 0, "sanity");
20197 
20198     int opcode = this->ideal_Opcode();
20199     int vlen = Matcher::vector_length(this, $src2);
20200     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20201                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20202   %}
20203   ins_pipe( pipe_slow );
20204 %}
20205 
20206 
20207 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
20208                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20209   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20210             Matcher::vector_length(n->in(2)) == 2);
20211   match(Set dst (MinReductionV dst src));
20212   match(Set dst (MaxReductionV dst src));
20213   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20214   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20215   ins_encode %{
20216     assert(UseAVX > 0, "sanity");
20217 
20218     int opcode = this->ideal_Opcode();
20219     int vlen = Matcher::vector_length(this, $src);
20220     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20221                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20222   %}
20223   ins_pipe( pipe_slow );
20224 %}
20225 
20226 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
20227                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20228   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20229             Matcher::vector_length(n->in(2)) >= 4);
20230   match(Set dst (MinReductionV dst src));
20231   match(Set dst (MaxReductionV dst src));
20232   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20233   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20234   ins_encode %{
20235     assert(UseAVX > 0, "sanity");
20236 
20237     int opcode = this->ideal_Opcode();
20238     int vlen = Matcher::vector_length(this, $src);
20239     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20240                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20241   %}
20242   ins_pipe( pipe_slow );
20243 %}
20244 
20245 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
20246   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20247             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20248              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20249             Matcher::vector_length(n->in(2)) == 2);
20250   match(Set dst (MinReductionV src1 src2));
20251   match(Set dst (MaxReductionV src1 src2));
20252   effect(TEMP dst, TEMP xtmp1);
20253   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20254   ins_encode %{
20255     int opcode = this->ideal_Opcode();
20256     int vlen = Matcher::vector_length(this, $src2);
20257     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20258                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
20259   %}
20260   ins_pipe( pipe_slow );
20261 %}
20262 
20263 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20264   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20265             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20266              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20267             Matcher::vector_length(n->in(2)) >= 4);
20268   match(Set dst (MinReductionV src1 src2));
20269   match(Set dst (MaxReductionV src1 src2));
20270   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20271   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20272   ins_encode %{
20273     int opcode = this->ideal_Opcode();
20274     int vlen = Matcher::vector_length(this, $src2);
20275     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20276                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20277   %}
20278   ins_pipe( pipe_slow );
20279 %}
20280 
20281 
20282 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20283   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20284             Matcher::vector_length(n->in(2)) == 2);
20285   match(Set dst (MinReductionV dst src));
20286   match(Set dst (MaxReductionV dst src));
20287   effect(TEMP dst, TEMP xtmp1);
20288   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20289   ins_encode %{
20290     int opcode = this->ideal_Opcode();
20291     int vlen = Matcher::vector_length(this, $src);
20292     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20293                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20294   %}
20295   ins_pipe( pipe_slow );
20296 %}
20297 
20298 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20299   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20300             Matcher::vector_length(n->in(2)) >= 4);
20301   match(Set dst (MinReductionV dst src));
20302   match(Set dst (MaxReductionV dst src));
20303   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20304   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20305   ins_encode %{
20306     int opcode = this->ideal_Opcode();
20307     int vlen = Matcher::vector_length(this, $src);
20308     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20309                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20310   %}
20311   ins_pipe( pipe_slow );
20312 %}
20313 
20314 // ====================VECTOR ARITHMETIC=======================================
20315 
20316 // --------------------------------- ADD --------------------------------------
20317 
20318 // Bytes vector add
20319 instruct vaddB(vec dst, vec src) %{
20320   predicate(UseAVX == 0);
20321   match(Set dst (AddVB dst src));
20322   format %{ "paddb   $dst,$src\t! add packedB" %}
20323   ins_encode %{
20324     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20325   %}
20326   ins_pipe( pipe_slow );
20327 %}
20328 
20329 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20330   predicate(UseAVX > 0);
20331   match(Set dst (AddVB src1 src2));
20332   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
20333   ins_encode %{
20334     int vlen_enc = vector_length_encoding(this);
20335     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20336   %}
20337   ins_pipe( pipe_slow );
20338 %}
20339 
20340 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20341   predicate((UseAVX > 0) &&
20342             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20343   match(Set dst (AddVB src (LoadVector mem)));
20344   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
20345   ins_encode %{
20346     int vlen_enc = vector_length_encoding(this);
20347     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20348   %}
20349   ins_pipe( pipe_slow );
20350 %}
20351 
20352 // Shorts/Chars vector add
20353 instruct vaddS(vec dst, vec src) %{
20354   predicate(UseAVX == 0);
20355   match(Set dst (AddVS dst src));
20356   format %{ "paddw   $dst,$src\t! add packedS" %}
20357   ins_encode %{
20358     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20359   %}
20360   ins_pipe( pipe_slow );
20361 %}
20362 
20363 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20364   predicate(UseAVX > 0);
20365   match(Set dst (AddVS src1 src2));
20366   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
20367   ins_encode %{
20368     int vlen_enc = vector_length_encoding(this);
20369     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20370   %}
20371   ins_pipe( pipe_slow );
20372 %}
20373 
20374 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20375   predicate((UseAVX > 0) &&
20376             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20377   match(Set dst (AddVS src (LoadVector mem)));
20378   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
20379   ins_encode %{
20380     int vlen_enc = vector_length_encoding(this);
20381     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20382   %}
20383   ins_pipe( pipe_slow );
20384 %}
20385 
20386 // Integers vector add
20387 instruct vaddI(vec dst, vec src) %{
20388   predicate(UseAVX == 0);
20389   match(Set dst (AddVI dst src));
20390   format %{ "paddd   $dst,$src\t! add packedI" %}
20391   ins_encode %{
20392     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20393   %}
20394   ins_pipe( pipe_slow );
20395 %}
20396 
20397 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20398   predicate(UseAVX > 0);
20399   match(Set dst (AddVI src1 src2));
20400   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
20401   ins_encode %{
20402     int vlen_enc = vector_length_encoding(this);
20403     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20404   %}
20405   ins_pipe( pipe_slow );
20406 %}
20407 
20408 
20409 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20410   predicate((UseAVX > 0) &&
20411             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20412   match(Set dst (AddVI src (LoadVector mem)));
20413   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
20414   ins_encode %{
20415     int vlen_enc = vector_length_encoding(this);
20416     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20417   %}
20418   ins_pipe( pipe_slow );
20419 %}
20420 
20421 // Longs vector add
20422 instruct vaddL(vec dst, vec src) %{
20423   predicate(UseAVX == 0);
20424   match(Set dst (AddVL dst src));
20425   format %{ "paddq   $dst,$src\t! add packedL" %}
20426   ins_encode %{
20427     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20428   %}
20429   ins_pipe( pipe_slow );
20430 %}
20431 
20432 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20433   predicate(UseAVX > 0);
20434   match(Set dst (AddVL src1 src2));
20435   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
20436   ins_encode %{
20437     int vlen_enc = vector_length_encoding(this);
20438     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20439   %}
20440   ins_pipe( pipe_slow );
20441 %}
20442 
20443 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20444   predicate((UseAVX > 0) &&
20445             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20446   match(Set dst (AddVL src (LoadVector mem)));
20447   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
20448   ins_encode %{
20449     int vlen_enc = vector_length_encoding(this);
20450     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20451   %}
20452   ins_pipe( pipe_slow );
20453 %}
20454 
20455 // Floats vector add
20456 instruct vaddF(vec dst, vec src) %{
20457   predicate(UseAVX == 0);
20458   match(Set dst (AddVF dst src));
20459   format %{ "addps   $dst,$src\t! add packedF" %}
20460   ins_encode %{
20461     __ addps($dst$$XMMRegister, $src$$XMMRegister);
20462   %}
20463   ins_pipe( pipe_slow );
20464 %}
20465 
20466 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20467   predicate(UseAVX > 0);
20468   match(Set dst (AddVF src1 src2));
20469   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20470   ins_encode %{
20471     int vlen_enc = vector_length_encoding(this);
20472     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20473   %}
20474   ins_pipe( pipe_slow );
20475 %}
20476 
20477 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20478   predicate((UseAVX > 0) &&
20479             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20480   match(Set dst (AddVF src (LoadVector mem)));
20481   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20482   ins_encode %{
20483     int vlen_enc = vector_length_encoding(this);
20484     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20485   %}
20486   ins_pipe( pipe_slow );
20487 %}
20488 
20489 // Doubles vector add
20490 instruct vaddD(vec dst, vec src) %{
20491   predicate(UseAVX == 0);
20492   match(Set dst (AddVD dst src));
20493   format %{ "addpd   $dst,$src\t! add packedD" %}
20494   ins_encode %{
20495     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20496   %}
20497   ins_pipe( pipe_slow );
20498 %}
20499 
20500 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20501   predicate(UseAVX > 0);
20502   match(Set dst (AddVD src1 src2));
20503   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20504   ins_encode %{
20505     int vlen_enc = vector_length_encoding(this);
20506     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20507   %}
20508   ins_pipe( pipe_slow );
20509 %}
20510 
20511 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20512   predicate((UseAVX > 0) &&
20513             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20514   match(Set dst (AddVD src (LoadVector mem)));
20515   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20516   ins_encode %{
20517     int vlen_enc = vector_length_encoding(this);
20518     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20519   %}
20520   ins_pipe( pipe_slow );
20521 %}
20522 
20523 // --------------------------------- SUB --------------------------------------
20524 
20525 // Bytes vector sub
20526 instruct vsubB(vec dst, vec src) %{
20527   predicate(UseAVX == 0);
20528   match(Set dst (SubVB dst src));
20529   format %{ "psubb   $dst,$src\t! sub packedB" %}
20530   ins_encode %{
20531     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20532   %}
20533   ins_pipe( pipe_slow );
20534 %}
20535 
20536 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20537   predicate(UseAVX > 0);
20538   match(Set dst (SubVB src1 src2));
20539   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20540   ins_encode %{
20541     int vlen_enc = vector_length_encoding(this);
20542     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20543   %}
20544   ins_pipe( pipe_slow );
20545 %}
20546 
20547 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20548   predicate((UseAVX > 0) &&
20549             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20550   match(Set dst (SubVB src (LoadVector mem)));
20551   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20552   ins_encode %{
20553     int vlen_enc = vector_length_encoding(this);
20554     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20555   %}
20556   ins_pipe( pipe_slow );
20557 %}
20558 
20559 // Shorts/Chars vector sub
20560 instruct vsubS(vec dst, vec src) %{
20561   predicate(UseAVX == 0);
20562   match(Set dst (SubVS dst src));
20563   format %{ "psubw   $dst,$src\t! sub packedS" %}
20564   ins_encode %{
20565     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20566   %}
20567   ins_pipe( pipe_slow );
20568 %}
20569 
20570 
20571 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20572   predicate(UseAVX > 0);
20573   match(Set dst (SubVS src1 src2));
20574   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20575   ins_encode %{
20576     int vlen_enc = vector_length_encoding(this);
20577     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20578   %}
20579   ins_pipe( pipe_slow );
20580 %}
20581 
20582 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20583   predicate((UseAVX > 0) &&
20584             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20585   match(Set dst (SubVS src (LoadVector mem)));
20586   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20587   ins_encode %{
20588     int vlen_enc = vector_length_encoding(this);
20589     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20590   %}
20591   ins_pipe( pipe_slow );
20592 %}
20593 
20594 // Integers vector sub
20595 instruct vsubI(vec dst, vec src) %{
20596   predicate(UseAVX == 0);
20597   match(Set dst (SubVI dst src));
20598   format %{ "psubd   $dst,$src\t! sub packedI" %}
20599   ins_encode %{
20600     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20601   %}
20602   ins_pipe( pipe_slow );
20603 %}
20604 
20605 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20606   predicate(UseAVX > 0);
20607   match(Set dst (SubVI src1 src2));
20608   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20609   ins_encode %{
20610     int vlen_enc = vector_length_encoding(this);
20611     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20612   %}
20613   ins_pipe( pipe_slow );
20614 %}
20615 
20616 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20617   predicate((UseAVX > 0) &&
20618             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20619   match(Set dst (SubVI src (LoadVector mem)));
20620   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20621   ins_encode %{
20622     int vlen_enc = vector_length_encoding(this);
20623     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20624   %}
20625   ins_pipe( pipe_slow );
20626 %}
20627 
20628 // Longs vector sub
20629 instruct vsubL(vec dst, vec src) %{
20630   predicate(UseAVX == 0);
20631   match(Set dst (SubVL dst src));
20632   format %{ "psubq   $dst,$src\t! sub packedL" %}
20633   ins_encode %{
20634     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20635   %}
20636   ins_pipe( pipe_slow );
20637 %}
20638 
20639 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20640   predicate(UseAVX > 0);
20641   match(Set dst (SubVL src1 src2));
20642   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20643   ins_encode %{
20644     int vlen_enc = vector_length_encoding(this);
20645     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20646   %}
20647   ins_pipe( pipe_slow );
20648 %}
20649 
20650 
20651 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20652   predicate((UseAVX > 0) &&
20653             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20654   match(Set dst (SubVL src (LoadVector mem)));
20655   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20656   ins_encode %{
20657     int vlen_enc = vector_length_encoding(this);
20658     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20659   %}
20660   ins_pipe( pipe_slow );
20661 %}
20662 
20663 // Floats vector sub
20664 instruct vsubF(vec dst, vec src) %{
20665   predicate(UseAVX == 0);
20666   match(Set dst (SubVF dst src));
20667   format %{ "subps   $dst,$src\t! sub packedF" %}
20668   ins_encode %{
20669     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20670   %}
20671   ins_pipe( pipe_slow );
20672 %}
20673 
20674 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20675   predicate(UseAVX > 0);
20676   match(Set dst (SubVF src1 src2));
20677   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20678   ins_encode %{
20679     int vlen_enc = vector_length_encoding(this);
20680     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20681   %}
20682   ins_pipe( pipe_slow );
20683 %}
20684 
20685 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20686   predicate((UseAVX > 0) &&
20687             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20688   match(Set dst (SubVF src (LoadVector mem)));
20689   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20690   ins_encode %{
20691     int vlen_enc = vector_length_encoding(this);
20692     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20693   %}
20694   ins_pipe( pipe_slow );
20695 %}
20696 
20697 // Doubles vector sub
20698 instruct vsubD(vec dst, vec src) %{
20699   predicate(UseAVX == 0);
20700   match(Set dst (SubVD dst src));
20701   format %{ "subpd   $dst,$src\t! sub packedD" %}
20702   ins_encode %{
20703     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20704   %}
20705   ins_pipe( pipe_slow );
20706 %}
20707 
20708 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20709   predicate(UseAVX > 0);
20710   match(Set dst (SubVD src1 src2));
20711   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20712   ins_encode %{
20713     int vlen_enc = vector_length_encoding(this);
20714     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20715   %}
20716   ins_pipe( pipe_slow );
20717 %}
20718 
20719 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20720   predicate((UseAVX > 0) &&
20721             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20722   match(Set dst (SubVD src (LoadVector mem)));
20723   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20724   ins_encode %{
20725     int vlen_enc = vector_length_encoding(this);
20726     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20727   %}
20728   ins_pipe( pipe_slow );
20729 %}
20730 
20731 // --------------------------------- MUL --------------------------------------
20732 
20733 // Byte vector mul
20734 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20735   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20736   match(Set dst (MulVB src1 src2));
20737   effect(TEMP dst, TEMP xtmp);
20738   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20739   ins_encode %{
20740     assert(UseSSE > 3, "required");
20741     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20742     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20743     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20744     __ psllw($dst$$XMMRegister, 8);
20745     __ psrlw($dst$$XMMRegister, 8);
20746     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20747   %}
20748   ins_pipe( pipe_slow );
20749 %}
20750 
20751 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20752   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20753   match(Set dst (MulVB src1 src2));
20754   effect(TEMP dst, TEMP xtmp);
20755   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20756   ins_encode %{
20757     assert(UseSSE > 3, "required");
20758     // Odd-index elements
20759     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20760     __ psrlw($dst$$XMMRegister, 8);
20761     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20762     __ psrlw($xtmp$$XMMRegister, 8);
20763     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20764     __ psllw($dst$$XMMRegister, 8);
20765     // Even-index elements
20766     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20767     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20768     __ psllw($xtmp$$XMMRegister, 8);
20769     __ psrlw($xtmp$$XMMRegister, 8);
20770     // Combine
20771     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20772   %}
20773   ins_pipe( pipe_slow );
20774 %}
20775 
20776 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20777   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20778   match(Set dst (MulVB src1 src2));
20779   effect(TEMP xtmp1, TEMP xtmp2);
20780   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20781   ins_encode %{
20782     int vlen_enc = vector_length_encoding(this);
20783     // Odd-index elements
20784     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20785     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20786     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20787     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20788     // Even-index elements
20789     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20790     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20791     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20792     // Combine
20793     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20794   %}
20795   ins_pipe( pipe_slow );
20796 %}
20797 
20798 // Shorts/Chars vector mul
20799 instruct vmulS(vec dst, vec src) %{
20800   predicate(UseAVX == 0);
20801   match(Set dst (MulVS dst src));
20802   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20803   ins_encode %{
20804     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20805   %}
20806   ins_pipe( pipe_slow );
20807 %}
20808 
20809 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20810   predicate(UseAVX > 0);
20811   match(Set dst (MulVS src1 src2));
20812   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20813   ins_encode %{
20814     int vlen_enc = vector_length_encoding(this);
20815     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20816   %}
20817   ins_pipe( pipe_slow );
20818 %}
20819 
20820 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20821   predicate((UseAVX > 0) &&
20822             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20823   match(Set dst (MulVS src (LoadVector mem)));
20824   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20825   ins_encode %{
20826     int vlen_enc = vector_length_encoding(this);
20827     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20828   %}
20829   ins_pipe( pipe_slow );
20830 %}
20831 
20832 // Integers vector mul
20833 instruct vmulI(vec dst, vec src) %{
20834   predicate(UseAVX == 0);
20835   match(Set dst (MulVI dst src));
20836   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20837   ins_encode %{
20838     assert(UseSSE > 3, "required");
20839     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20840   %}
20841   ins_pipe( pipe_slow );
20842 %}
20843 
20844 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20845   predicate(UseAVX > 0);
20846   match(Set dst (MulVI src1 src2));
20847   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20848   ins_encode %{
20849     int vlen_enc = vector_length_encoding(this);
20850     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20851   %}
20852   ins_pipe( pipe_slow );
20853 %}
20854 
20855 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20856   predicate((UseAVX > 0) &&
20857             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20858   match(Set dst (MulVI src (LoadVector mem)));
20859   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20860   ins_encode %{
20861     int vlen_enc = vector_length_encoding(this);
20862     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20863   %}
20864   ins_pipe( pipe_slow );
20865 %}
20866 
20867 // Longs vector mul
20868 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20869   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20870              VM_Version::supports_avx512dq()) ||
20871             VM_Version::supports_avx512vldq());
20872   match(Set dst (MulVL src1 src2));
20873   ins_cost(500);
20874   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20875   ins_encode %{
20876     assert(UseAVX > 2, "required");
20877     int vlen_enc = vector_length_encoding(this);
20878     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20879   %}
20880   ins_pipe( pipe_slow );
20881 %}
20882 
20883 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20884   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20885              VM_Version::supports_avx512dq()) ||
20886             (Matcher::vector_length_in_bytes(n) > 8 &&
20887              VM_Version::supports_avx512vldq()));
20888   match(Set dst (MulVL src (LoadVector mem)));
20889   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20890   ins_cost(500);
20891   ins_encode %{
20892     assert(UseAVX > 2, "required");
20893     int vlen_enc = vector_length_encoding(this);
20894     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20895   %}
20896   ins_pipe( pipe_slow );
20897 %}
20898 
20899 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20900   predicate(UseAVX == 0);
20901   match(Set dst (MulVL src1 src2));
20902   ins_cost(500);
20903   effect(TEMP dst, TEMP xtmp);
20904   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20905   ins_encode %{
20906     assert(VM_Version::supports_sse4_1(), "required");
20907     // Get the lo-hi products, only the lower 32 bits is in concerns
20908     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20909     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20910     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20911     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20912     __ psllq($dst$$XMMRegister, 32);
20913     // Get the lo-lo products
20914     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20915     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20916     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20917   %}
20918   ins_pipe( pipe_slow );
20919 %}
20920 
20921 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20922   predicate(UseAVX > 0 &&
20923             ((Matcher::vector_length_in_bytes(n) == 64 &&
20924               !VM_Version::supports_avx512dq()) ||
20925              (Matcher::vector_length_in_bytes(n) < 64 &&
20926               !VM_Version::supports_avx512vldq())));
20927   match(Set dst (MulVL src1 src2));
20928   effect(TEMP xtmp1, TEMP xtmp2);
20929   ins_cost(500);
20930   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20931   ins_encode %{
20932     int vlen_enc = vector_length_encoding(this);
20933     // Get the lo-hi products, only the lower 32 bits is in concerns
20934     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20935     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20936     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20937     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20938     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20939     // Get the lo-lo products
20940     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20941     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20942   %}
20943   ins_pipe( pipe_slow );
20944 %}
20945 
20946 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20947   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20948   match(Set dst (MulVL src1 src2));
20949   ins_cost(100);
20950   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20951   ins_encode %{
20952     int vlen_enc = vector_length_encoding(this);
20953     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20954   %}
20955   ins_pipe( pipe_slow );
20956 %}
20957 
20958 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20959   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20960   match(Set dst (MulVL src1 src2));
20961   ins_cost(100);
20962   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20963   ins_encode %{
20964     int vlen_enc = vector_length_encoding(this);
20965     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20966   %}
20967   ins_pipe( pipe_slow );
20968 %}
20969 
20970 // Floats vector mul
20971 instruct vmulF(vec dst, vec src) %{
20972   predicate(UseAVX == 0);
20973   match(Set dst (MulVF dst src));
20974   format %{ "mulps   $dst,$src\t! mul packedF" %}
20975   ins_encode %{
20976     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20977   %}
20978   ins_pipe( pipe_slow );
20979 %}
20980 
20981 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20982   predicate(UseAVX > 0);
20983   match(Set dst (MulVF src1 src2));
20984   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20985   ins_encode %{
20986     int vlen_enc = vector_length_encoding(this);
20987     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20988   %}
20989   ins_pipe( pipe_slow );
20990 %}
20991 
20992 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20993   predicate((UseAVX > 0) &&
20994             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20995   match(Set dst (MulVF src (LoadVector mem)));
20996   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20997   ins_encode %{
20998     int vlen_enc = vector_length_encoding(this);
20999     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21000   %}
21001   ins_pipe( pipe_slow );
21002 %}
21003 
21004 // Doubles vector mul
21005 instruct vmulD(vec dst, vec src) %{
21006   predicate(UseAVX == 0);
21007   match(Set dst (MulVD dst src));
21008   format %{ "mulpd   $dst,$src\t! mul packedD" %}
21009   ins_encode %{
21010     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
21011   %}
21012   ins_pipe( pipe_slow );
21013 %}
21014 
21015 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
21016   predicate(UseAVX > 0);
21017   match(Set dst (MulVD src1 src2));
21018   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
21019   ins_encode %{
21020     int vlen_enc = vector_length_encoding(this);
21021     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21022   %}
21023   ins_pipe( pipe_slow );
21024 %}
21025 
21026 instruct vmulD_mem(vec dst, vec src, memory mem) %{
21027   predicate((UseAVX > 0) &&
21028             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21029   match(Set dst (MulVD src (LoadVector mem)));
21030   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
21031   ins_encode %{
21032     int vlen_enc = vector_length_encoding(this);
21033     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21034   %}
21035   ins_pipe( pipe_slow );
21036 %}
21037 
21038 // --------------------------------- DIV --------------------------------------
21039 
21040 // Floats vector div
21041 instruct vdivF(vec dst, vec src) %{
21042   predicate(UseAVX == 0);
21043   match(Set dst (DivVF dst src));
21044   format %{ "divps   $dst,$src\t! div packedF" %}
21045   ins_encode %{
21046     __ divps($dst$$XMMRegister, $src$$XMMRegister);
21047   %}
21048   ins_pipe( pipe_slow );
21049 %}
21050 
21051 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
21052   predicate(UseAVX > 0);
21053   match(Set dst (DivVF src1 src2));
21054   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
21055   ins_encode %{
21056     int vlen_enc = vector_length_encoding(this);
21057     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21058   %}
21059   ins_pipe( pipe_slow );
21060 %}
21061 
21062 instruct vdivF_mem(vec dst, vec src, memory mem) %{
21063   predicate((UseAVX > 0) &&
21064             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21065   match(Set dst (DivVF src (LoadVector mem)));
21066   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
21067   ins_encode %{
21068     int vlen_enc = vector_length_encoding(this);
21069     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21070   %}
21071   ins_pipe( pipe_slow );
21072 %}
21073 
21074 // Doubles vector div
21075 instruct vdivD(vec dst, vec src) %{
21076   predicate(UseAVX == 0);
21077   match(Set dst (DivVD dst src));
21078   format %{ "divpd   $dst,$src\t! div packedD" %}
21079   ins_encode %{
21080     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
21081   %}
21082   ins_pipe( pipe_slow );
21083 %}
21084 
21085 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
21086   predicate(UseAVX > 0);
21087   match(Set dst (DivVD src1 src2));
21088   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
21089   ins_encode %{
21090     int vlen_enc = vector_length_encoding(this);
21091     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21092   %}
21093   ins_pipe( pipe_slow );
21094 %}
21095 
21096 instruct vdivD_mem(vec dst, vec src, memory mem) %{
21097   predicate((UseAVX > 0) &&
21098             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21099   match(Set dst (DivVD src (LoadVector mem)));
21100   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
21101   ins_encode %{
21102     int vlen_enc = vector_length_encoding(this);
21103     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21104   %}
21105   ins_pipe( pipe_slow );
21106 %}
21107 
21108 // ------------------------------ MinMax ---------------------------------------
21109 
21110 // Byte, Short, Int vector Min/Max
21111 instruct minmax_reg_sse(vec dst, vec src) %{
21112   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21113             UseAVX == 0);
21114   match(Set dst (MinV dst src));
21115   match(Set dst (MaxV dst src));
21116   format %{ "vector_minmax  $dst,$src\t!  " %}
21117   ins_encode %{
21118     assert(UseSSE >= 4, "required");
21119 
21120     int opcode = this->ideal_Opcode();
21121     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21122     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
21123   %}
21124   ins_pipe( pipe_slow );
21125 %}
21126 
21127 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
21128   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21129             UseAVX > 0);
21130   match(Set dst (MinV src1 src2));
21131   match(Set dst (MaxV src1 src2));
21132   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
21133   ins_encode %{
21134     int opcode = this->ideal_Opcode();
21135     int vlen_enc = vector_length_encoding(this);
21136     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21137 
21138     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21139   %}
21140   ins_pipe( pipe_slow );
21141 %}
21142 
21143 // Long vector Min/Max
21144 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
21145   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
21146             UseAVX == 0);
21147   match(Set dst (MinV dst src));
21148   match(Set dst (MaxV src dst));
21149   effect(TEMP dst, TEMP tmp);
21150   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
21151   ins_encode %{
21152     assert(UseSSE >= 4, "required");
21153 
21154     int opcode = this->ideal_Opcode();
21155     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21156     assert(elem_bt == T_LONG, "sanity");
21157 
21158     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
21159   %}
21160   ins_pipe( pipe_slow );
21161 %}
21162 
21163 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
21164   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
21165             UseAVX > 0 && !VM_Version::supports_avx512vl());
21166   match(Set dst (MinV src1 src2));
21167   match(Set dst (MaxV src1 src2));
21168   effect(TEMP dst);
21169   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
21170   ins_encode %{
21171     int vlen_enc = vector_length_encoding(this);
21172     int opcode = this->ideal_Opcode();
21173     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21174     assert(elem_bt == T_LONG, "sanity");
21175 
21176     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21177   %}
21178   ins_pipe( pipe_slow );
21179 %}
21180 
21181 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
21182   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
21183             Matcher::vector_element_basic_type(n) == T_LONG);
21184   match(Set dst (MinV src1 src2));
21185   match(Set dst (MaxV src1 src2));
21186   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
21187   ins_encode %{
21188     assert(UseAVX > 2, "required");
21189 
21190     int vlen_enc = vector_length_encoding(this);
21191     int opcode = this->ideal_Opcode();
21192     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21193     assert(elem_bt == T_LONG, "sanity");
21194 
21195     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21196   %}
21197   ins_pipe( pipe_slow );
21198 %}
21199 
21200 // Float/Double vector Min/Max
21201 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
21202   predicate(VM_Version::supports_avx10_2() &&
21203             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21204   match(Set dst (MinV a b));
21205   match(Set dst (MaxV a b));
21206   format %{ "vector_minmaxFP  $dst, $a, $b" %}
21207   ins_encode %{
21208     int vlen_enc = vector_length_encoding(this);
21209     int opcode = this->ideal_Opcode();
21210     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21211     __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21212   %}
21213   ins_pipe( pipe_slow );
21214 %}
21215 
21216 // Float/Double vector Min/Max
21217 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
21218   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
21219             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
21220             UseAVX > 0);
21221   match(Set dst (MinV a b));
21222   match(Set dst (MaxV a b));
21223   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
21224   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
21225   ins_encode %{
21226     assert(UseAVX > 0, "required");
21227 
21228     int opcode = this->ideal_Opcode();
21229     int vlen_enc = vector_length_encoding(this);
21230     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21231 
21232     __ vminmax_fp(opcode, elem_bt,
21233                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21234                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21235   %}
21236   ins_pipe( pipe_slow );
21237 %}
21238 
21239 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
21240   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
21241             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21242   match(Set dst (MinV a b));
21243   match(Set dst (MaxV a b));
21244   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
21245   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21246   ins_encode %{
21247     assert(UseAVX > 2, "required");
21248 
21249     int opcode = this->ideal_Opcode();
21250     int vlen_enc = vector_length_encoding(this);
21251     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21252 
21253     __ evminmax_fp(opcode, elem_bt,
21254                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21255                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21256   %}
21257   ins_pipe( pipe_slow );
21258 %}
21259 
21260 // ------------------------------ Unsigned vector Min/Max ----------------------
21261 
21262 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21263   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21264   match(Set dst (UMinV a b));
21265   match(Set dst (UMaxV a b));
21266   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21267   ins_encode %{
21268     int opcode = this->ideal_Opcode();
21269     int vlen_enc = vector_length_encoding(this);
21270     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21271     assert(is_integral_type(elem_bt), "");
21272     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21273   %}
21274   ins_pipe( pipe_slow );
21275 %}
21276 
21277 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21278   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21279   match(Set dst (UMinV a (LoadVector b)));
21280   match(Set dst (UMaxV a (LoadVector b)));
21281   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21282   ins_encode %{
21283     int opcode = this->ideal_Opcode();
21284     int vlen_enc = vector_length_encoding(this);
21285     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21286     assert(is_integral_type(elem_bt), "");
21287     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21288   %}
21289   ins_pipe( pipe_slow );
21290 %}
21291 
21292 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21293   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21294   match(Set dst (UMinV a b));
21295   match(Set dst (UMaxV a b));
21296   effect(TEMP xtmp1, TEMP xtmp2);
21297   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21298   ins_encode %{
21299     int opcode = this->ideal_Opcode();
21300     int vlen_enc = vector_length_encoding(this);
21301     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21302   %}
21303   ins_pipe( pipe_slow );
21304 %}
21305 
21306 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21307   match(Set dst (UMinV (Binary dst src2) mask));
21308   match(Set dst (UMaxV (Binary dst src2) mask));
21309   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21310   ins_encode %{
21311     int vlen_enc = vector_length_encoding(this);
21312     BasicType bt = Matcher::vector_element_basic_type(this);
21313     int opc = this->ideal_Opcode();
21314     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21315                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21316   %}
21317   ins_pipe( pipe_slow );
21318 %}
21319 
21320 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21321   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21322   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21323   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21324   ins_encode %{
21325     int vlen_enc = vector_length_encoding(this);
21326     BasicType bt = Matcher::vector_element_basic_type(this);
21327     int opc = this->ideal_Opcode();
21328     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21329                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21330   %}
21331   ins_pipe( pipe_slow );
21332 %}
21333 
21334 // --------------------------------- Signum/CopySign ---------------------------
21335 
21336 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21337   match(Set dst (SignumF dst (Binary zero one)));
21338   effect(KILL cr);
21339   format %{ "signumF $dst, $dst" %}
21340   ins_encode %{
21341     int opcode = this->ideal_Opcode();
21342     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21343   %}
21344   ins_pipe( pipe_slow );
21345 %}
21346 
21347 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21348   match(Set dst (SignumD dst (Binary zero one)));
21349   effect(KILL cr);
21350   format %{ "signumD $dst, $dst" %}
21351   ins_encode %{
21352     int opcode = this->ideal_Opcode();
21353     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21354   %}
21355   ins_pipe( pipe_slow );
21356 %}
21357 
21358 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21359   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21360   match(Set dst (SignumVF src (Binary zero one)));
21361   match(Set dst (SignumVD src (Binary zero one)));
21362   effect(TEMP dst, TEMP xtmp1);
21363   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21364   ins_encode %{
21365     int opcode = this->ideal_Opcode();
21366     int vec_enc = vector_length_encoding(this);
21367     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21368                          $xtmp1$$XMMRegister, vec_enc);
21369   %}
21370   ins_pipe( pipe_slow );
21371 %}
21372 
21373 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21374   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21375   match(Set dst (SignumVF src (Binary zero one)));
21376   match(Set dst (SignumVD src (Binary zero one)));
21377   effect(TEMP dst, TEMP ktmp1);
21378   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21379   ins_encode %{
21380     int opcode = this->ideal_Opcode();
21381     int vec_enc = vector_length_encoding(this);
21382     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21383                           $ktmp1$$KRegister, vec_enc);
21384   %}
21385   ins_pipe( pipe_slow );
21386 %}
21387 
21388 // ---------------------------------------
21389 // For copySign use 0xE4 as writemask for vpternlog
21390 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21391 // C (xmm2) is set to 0x7FFFFFFF
21392 // Wherever xmm2 is 0, we want to pick from B (sign)
21393 // Wherever xmm2 is 1, we want to pick from A (src)
21394 //
21395 // A B C Result
21396 // 0 0 0 0
21397 // 0 0 1 0
21398 // 0 1 0 1
21399 // 0 1 1 0
21400 // 1 0 0 0
21401 // 1 0 1 1
21402 // 1 1 0 1
21403 // 1 1 1 1
21404 //
21405 // Result going from high bit to low bit is 0x11100100 = 0xe4
21406 // ---------------------------------------
21407 
21408 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21409   match(Set dst (CopySignF dst src));
21410   effect(TEMP tmp1, TEMP tmp2);
21411   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21412   ins_encode %{
21413     __ movl($tmp2$$Register, 0x7FFFFFFF);
21414     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21415     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21416   %}
21417   ins_pipe( pipe_slow );
21418 %}
21419 
21420 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21421   match(Set dst (CopySignD dst (Binary src zero)));
21422   ins_cost(100);
21423   effect(TEMP tmp1, TEMP tmp2);
21424   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21425   ins_encode %{
21426     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21427     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21428     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21429   %}
21430   ins_pipe( pipe_slow );
21431 %}
21432 
21433 //----------------------------- CompressBits/ExpandBits ------------------------
21434 
21435 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21436   predicate(n->bottom_type()->isa_int());
21437   match(Set dst (CompressBits src mask));
21438   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21439   ins_encode %{
21440     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21441   %}
21442   ins_pipe( pipe_slow );
21443 %}
21444 
21445 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21446   predicate(n->bottom_type()->isa_int());
21447   match(Set dst (ExpandBits src mask));
21448   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21449   ins_encode %{
21450     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21451   %}
21452   ins_pipe( pipe_slow );
21453 %}
21454 
21455 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21456   predicate(n->bottom_type()->isa_int());
21457   match(Set dst (CompressBits src (LoadI mask)));
21458   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21459   ins_encode %{
21460     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21461   %}
21462   ins_pipe( pipe_slow );
21463 %}
21464 
21465 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21466   predicate(n->bottom_type()->isa_int());
21467   match(Set dst (ExpandBits src (LoadI mask)));
21468   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21469   ins_encode %{
21470     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21471   %}
21472   ins_pipe( pipe_slow );
21473 %}
21474 
21475 // --------------------------------- Sqrt --------------------------------------
21476 
21477 instruct vsqrtF_reg(vec dst, vec src) %{
21478   match(Set dst (SqrtVF src));
21479   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21480   ins_encode %{
21481     assert(UseAVX > 0, "required");
21482     int vlen_enc = vector_length_encoding(this);
21483     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21484   %}
21485   ins_pipe( pipe_slow );
21486 %}
21487 
21488 instruct vsqrtF_mem(vec dst, memory mem) %{
21489   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21490   match(Set dst (SqrtVF (LoadVector mem)));
21491   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21492   ins_encode %{
21493     assert(UseAVX > 0, "required");
21494     int vlen_enc = vector_length_encoding(this);
21495     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21496   %}
21497   ins_pipe( pipe_slow );
21498 %}
21499 
21500 // Floating point vector sqrt
21501 instruct vsqrtD_reg(vec dst, vec src) %{
21502   match(Set dst (SqrtVD src));
21503   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21504   ins_encode %{
21505     assert(UseAVX > 0, "required");
21506     int vlen_enc = vector_length_encoding(this);
21507     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21508   %}
21509   ins_pipe( pipe_slow );
21510 %}
21511 
21512 instruct vsqrtD_mem(vec dst, memory mem) %{
21513   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21514   match(Set dst (SqrtVD (LoadVector mem)));
21515   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21516   ins_encode %{
21517     assert(UseAVX > 0, "required");
21518     int vlen_enc = vector_length_encoding(this);
21519     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21520   %}
21521   ins_pipe( pipe_slow );
21522 %}
21523 
21524 // ------------------------------ Shift ---------------------------------------
21525 
21526 // Left and right shift count vectors are the same on x86
21527 // (only lowest bits of xmm reg are used for count).
21528 instruct vshiftcnt(vec dst, rRegI cnt) %{
21529   match(Set dst (LShiftCntV cnt));
21530   match(Set dst (RShiftCntV cnt));
21531   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21532   ins_encode %{
21533     __ movdl($dst$$XMMRegister, $cnt$$Register);
21534   %}
21535   ins_pipe( pipe_slow );
21536 %}
21537 
21538 // Byte vector shift
21539 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21540   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21541   match(Set dst ( LShiftVB src shift));
21542   match(Set dst ( RShiftVB src shift));
21543   match(Set dst (URShiftVB src shift));
21544   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21545   format %{"vector_byte_shift $dst,$src,$shift" %}
21546   ins_encode %{
21547     assert(UseSSE > 3, "required");
21548     int opcode = this->ideal_Opcode();
21549     bool sign = (opcode != Op_URShiftVB);
21550     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21551     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21552     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21553     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21554     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21555   %}
21556   ins_pipe( pipe_slow );
21557 %}
21558 
21559 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21560   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21561             UseAVX <= 1);
21562   match(Set dst ( LShiftVB src shift));
21563   match(Set dst ( RShiftVB src shift));
21564   match(Set dst (URShiftVB src shift));
21565   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21566   format %{"vector_byte_shift $dst,$src,$shift" %}
21567   ins_encode %{
21568     assert(UseSSE > 3, "required");
21569     int opcode = this->ideal_Opcode();
21570     bool sign = (opcode != Op_URShiftVB);
21571     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21572     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21573     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21574     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21575     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21576     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21577     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21578     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21579     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21580   %}
21581   ins_pipe( pipe_slow );
21582 %}
21583 
21584 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21585   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21586             UseAVX > 1);
21587   match(Set dst ( LShiftVB src shift));
21588   match(Set dst ( RShiftVB src shift));
21589   match(Set dst (URShiftVB src shift));
21590   effect(TEMP dst, TEMP tmp);
21591   format %{"vector_byte_shift $dst,$src,$shift" %}
21592   ins_encode %{
21593     int opcode = this->ideal_Opcode();
21594     bool sign = (opcode != Op_URShiftVB);
21595     int vlen_enc = Assembler::AVX_256bit;
21596     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21597     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21598     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21599     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21600     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21601   %}
21602   ins_pipe( pipe_slow );
21603 %}
21604 
21605 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21606   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21607   match(Set dst ( LShiftVB src shift));
21608   match(Set dst ( RShiftVB src shift));
21609   match(Set dst (URShiftVB src shift));
21610   effect(TEMP dst, TEMP tmp);
21611   format %{"vector_byte_shift $dst,$src,$shift" %}
21612   ins_encode %{
21613     assert(UseAVX > 1, "required");
21614     int opcode = this->ideal_Opcode();
21615     bool sign = (opcode != Op_URShiftVB);
21616     int vlen_enc = Assembler::AVX_256bit;
21617     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21618     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21619     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21620     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21621     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21622     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21623     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21624     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21625     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21626   %}
21627   ins_pipe( pipe_slow );
21628 %}
21629 
21630 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21631   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21632   match(Set dst ( LShiftVB src shift));
21633   match(Set dst  (RShiftVB src shift));
21634   match(Set dst (URShiftVB src shift));
21635   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21636   format %{"vector_byte_shift $dst,$src,$shift" %}
21637   ins_encode %{
21638     assert(UseAVX > 2, "required");
21639     int opcode = this->ideal_Opcode();
21640     bool sign = (opcode != Op_URShiftVB);
21641     int vlen_enc = Assembler::AVX_512bit;
21642     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21643     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21644     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21645     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21646     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21647     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21648     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21649     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21650     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21651     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21652     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21653     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21654   %}
21655   ins_pipe( pipe_slow );
21656 %}
21657 
21658 // Shorts vector logical right shift produces incorrect Java result
21659 // for negative data because java code convert short value into int with
21660 // sign extension before a shift. But char vectors are fine since chars are
21661 // unsigned values.
21662 // Shorts/Chars vector left shift
21663 instruct vshiftS(vec dst, vec src, vec shift) %{
21664   predicate(!n->as_ShiftV()->is_var_shift());
21665   match(Set dst ( LShiftVS src shift));
21666   match(Set dst ( RShiftVS src shift));
21667   match(Set dst (URShiftVS src shift));
21668   effect(TEMP dst, USE src, USE shift);
21669   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21670   ins_encode %{
21671     int opcode = this->ideal_Opcode();
21672     if (UseAVX > 0) {
21673       int vlen_enc = vector_length_encoding(this);
21674       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21675     } else {
21676       int vlen = Matcher::vector_length(this);
21677       if (vlen == 2) {
21678         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21679         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21680       } else if (vlen == 4) {
21681         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21682         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21683       } else {
21684         assert (vlen == 8, "sanity");
21685         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21686         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21687       }
21688     }
21689   %}
21690   ins_pipe( pipe_slow );
21691 %}
21692 
21693 // Integers vector left shift
21694 instruct vshiftI(vec dst, vec src, vec shift) %{
21695   predicate(!n->as_ShiftV()->is_var_shift());
21696   match(Set dst ( LShiftVI src shift));
21697   match(Set dst ( RShiftVI src shift));
21698   match(Set dst (URShiftVI src shift));
21699   effect(TEMP dst, USE src, USE shift);
21700   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21701   ins_encode %{
21702     int opcode = this->ideal_Opcode();
21703     if (UseAVX > 0) {
21704       int vlen_enc = vector_length_encoding(this);
21705       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21706     } else {
21707       int vlen = Matcher::vector_length(this);
21708       if (vlen == 2) {
21709         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21710         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21711       } else {
21712         assert(vlen == 4, "sanity");
21713         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21714         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21715       }
21716     }
21717   %}
21718   ins_pipe( pipe_slow );
21719 %}
21720 
21721 // Integers vector left constant shift
21722 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21723   match(Set dst (LShiftVI src (LShiftCntV shift)));
21724   match(Set dst (RShiftVI src (RShiftCntV shift)));
21725   match(Set dst (URShiftVI src (RShiftCntV shift)));
21726   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21727   ins_encode %{
21728     int opcode = this->ideal_Opcode();
21729     if (UseAVX > 0) {
21730       int vector_len = vector_length_encoding(this);
21731       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21732     } else {
21733       int vlen = Matcher::vector_length(this);
21734       if (vlen == 2) {
21735         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21736         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21737       } else {
21738         assert(vlen == 4, "sanity");
21739         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21740         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21741       }
21742     }
21743   %}
21744   ins_pipe( pipe_slow );
21745 %}
21746 
21747 // Longs vector shift
21748 instruct vshiftL(vec dst, vec src, vec shift) %{
21749   predicate(!n->as_ShiftV()->is_var_shift());
21750   match(Set dst ( LShiftVL src shift));
21751   match(Set dst (URShiftVL src shift));
21752   effect(TEMP dst, USE src, USE shift);
21753   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21754   ins_encode %{
21755     int opcode = this->ideal_Opcode();
21756     if (UseAVX > 0) {
21757       int vlen_enc = vector_length_encoding(this);
21758       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21759     } else {
21760       assert(Matcher::vector_length(this) == 2, "");
21761       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21762       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21763     }
21764   %}
21765   ins_pipe( pipe_slow );
21766 %}
21767 
21768 // Longs vector constant shift
21769 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21770   match(Set dst (LShiftVL src (LShiftCntV shift)));
21771   match(Set dst (URShiftVL src (RShiftCntV shift)));
21772   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21773   ins_encode %{
21774     int opcode = this->ideal_Opcode();
21775     if (UseAVX > 0) {
21776       int vector_len = vector_length_encoding(this);
21777       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21778     } else {
21779       assert(Matcher::vector_length(this) == 2, "");
21780       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21781       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21782     }
21783   %}
21784   ins_pipe( pipe_slow );
21785 %}
21786 
21787 // -------------------ArithmeticRightShift -----------------------------------
21788 // Long vector arithmetic right shift
21789 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21790   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21791   match(Set dst (RShiftVL src shift));
21792   effect(TEMP dst, TEMP tmp);
21793   format %{ "vshiftq $dst,$src,$shift" %}
21794   ins_encode %{
21795     uint vlen = Matcher::vector_length(this);
21796     if (vlen == 2) {
21797       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21798       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21799       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21800       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21801       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21802       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21803     } else {
21804       assert(vlen == 4, "sanity");
21805       assert(UseAVX > 1, "required");
21806       int vlen_enc = Assembler::AVX_256bit;
21807       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21808       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21809       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21810       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21811       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21812     }
21813   %}
21814   ins_pipe( pipe_slow );
21815 %}
21816 
21817 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21818   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21819   match(Set dst (RShiftVL src shift));
21820   format %{ "vshiftq $dst,$src,$shift" %}
21821   ins_encode %{
21822     int vlen_enc = vector_length_encoding(this);
21823     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21824   %}
21825   ins_pipe( pipe_slow );
21826 %}
21827 
21828 // ------------------- Variable Shift -----------------------------
21829 // Byte variable shift
21830 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21831   predicate(Matcher::vector_length(n) <= 8 &&
21832             n->as_ShiftV()->is_var_shift() &&
21833             !VM_Version::supports_avx512bw());
21834   match(Set dst ( LShiftVB src shift));
21835   match(Set dst ( RShiftVB src shift));
21836   match(Set dst (URShiftVB src shift));
21837   effect(TEMP dst, TEMP vtmp);
21838   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21839   ins_encode %{
21840     assert(UseAVX >= 2, "required");
21841 
21842     int opcode = this->ideal_Opcode();
21843     int vlen_enc = Assembler::AVX_128bit;
21844     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21845     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21846   %}
21847   ins_pipe( pipe_slow );
21848 %}
21849 
21850 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21851   predicate(Matcher::vector_length(n) == 16 &&
21852             n->as_ShiftV()->is_var_shift() &&
21853             !VM_Version::supports_avx512bw());
21854   match(Set dst ( LShiftVB src shift));
21855   match(Set dst ( RShiftVB src shift));
21856   match(Set dst (URShiftVB src shift));
21857   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21858   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21859   ins_encode %{
21860     assert(UseAVX >= 2, "required");
21861 
21862     int opcode = this->ideal_Opcode();
21863     int vlen_enc = Assembler::AVX_128bit;
21864     // Shift lower half and get word result in dst
21865     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21866 
21867     // Shift upper half and get word result in vtmp1
21868     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21869     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21870     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21871 
21872     // Merge and down convert the two word results to byte in dst
21873     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21874   %}
21875   ins_pipe( pipe_slow );
21876 %}
21877 
21878 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21879   predicate(Matcher::vector_length(n) == 32 &&
21880             n->as_ShiftV()->is_var_shift() &&
21881             !VM_Version::supports_avx512bw());
21882   match(Set dst ( LShiftVB src shift));
21883   match(Set dst ( RShiftVB src shift));
21884   match(Set dst (URShiftVB src shift));
21885   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21886   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21887   ins_encode %{
21888     assert(UseAVX >= 2, "required");
21889 
21890     int opcode = this->ideal_Opcode();
21891     int vlen_enc = Assembler::AVX_128bit;
21892     // Process lower 128 bits and get result in dst
21893     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21894     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21895     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21896     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21897     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21898 
21899     // Process higher 128 bits and get result in vtmp3
21900     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21901     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21902     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21903     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21904     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21905     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21906     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21907 
21908     // Merge the two results in dst
21909     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21910   %}
21911   ins_pipe( pipe_slow );
21912 %}
21913 
21914 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21915   predicate(Matcher::vector_length(n) <= 32 &&
21916             n->as_ShiftV()->is_var_shift() &&
21917             VM_Version::supports_avx512bw());
21918   match(Set dst ( LShiftVB src shift));
21919   match(Set dst ( RShiftVB src shift));
21920   match(Set dst (URShiftVB src shift));
21921   effect(TEMP dst, TEMP vtmp);
21922   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21923   ins_encode %{
21924     assert(UseAVX > 2, "required");
21925 
21926     int opcode = this->ideal_Opcode();
21927     int vlen_enc = vector_length_encoding(this);
21928     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21929   %}
21930   ins_pipe( pipe_slow );
21931 %}
21932 
21933 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21934   predicate(Matcher::vector_length(n) == 64 &&
21935             n->as_ShiftV()->is_var_shift() &&
21936             VM_Version::supports_avx512bw());
21937   match(Set dst ( LShiftVB src shift));
21938   match(Set dst ( RShiftVB src shift));
21939   match(Set dst (URShiftVB src shift));
21940   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21941   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21942   ins_encode %{
21943     assert(UseAVX > 2, "required");
21944 
21945     int opcode = this->ideal_Opcode();
21946     int vlen_enc = Assembler::AVX_256bit;
21947     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21948     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21949     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21950     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21951     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21952   %}
21953   ins_pipe( pipe_slow );
21954 %}
21955 
21956 // Short variable shift
21957 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21958   predicate(Matcher::vector_length(n) <= 8 &&
21959             n->as_ShiftV()->is_var_shift() &&
21960             !VM_Version::supports_avx512bw());
21961   match(Set dst ( LShiftVS src shift));
21962   match(Set dst ( RShiftVS src shift));
21963   match(Set dst (URShiftVS src shift));
21964   effect(TEMP dst, TEMP vtmp);
21965   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21966   ins_encode %{
21967     assert(UseAVX >= 2, "required");
21968 
21969     int opcode = this->ideal_Opcode();
21970     bool sign = (opcode != Op_URShiftVS);
21971     int vlen_enc = Assembler::AVX_256bit;
21972     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21973     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21974     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21975     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21976     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21977     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21978   %}
21979   ins_pipe( pipe_slow );
21980 %}
21981 
21982 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21983   predicate(Matcher::vector_length(n) == 16 &&
21984             n->as_ShiftV()->is_var_shift() &&
21985             !VM_Version::supports_avx512bw());
21986   match(Set dst ( LShiftVS src shift));
21987   match(Set dst ( RShiftVS src shift));
21988   match(Set dst (URShiftVS src shift));
21989   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21990   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21991   ins_encode %{
21992     assert(UseAVX >= 2, "required");
21993 
21994     int opcode = this->ideal_Opcode();
21995     bool sign = (opcode != Op_URShiftVS);
21996     int vlen_enc = Assembler::AVX_256bit;
21997     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21998     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21999     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22000     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22001     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22002 
22003     // Shift upper half, with result in dst using vtmp1 as TEMP
22004     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
22005     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
22006     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22007     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22008     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22009     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22010 
22011     // Merge lower and upper half result into dst
22012     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22013     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
22014   %}
22015   ins_pipe( pipe_slow );
22016 %}
22017 
22018 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
22019   predicate(n->as_ShiftV()->is_var_shift() &&
22020             VM_Version::supports_avx512bw());
22021   match(Set dst ( LShiftVS src shift));
22022   match(Set dst ( RShiftVS src shift));
22023   match(Set dst (URShiftVS src shift));
22024   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
22025   ins_encode %{
22026     assert(UseAVX > 2, "required");
22027 
22028     int opcode = this->ideal_Opcode();
22029     int vlen_enc = vector_length_encoding(this);
22030     if (!VM_Version::supports_avx512vl()) {
22031       vlen_enc = Assembler::AVX_512bit;
22032     }
22033     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22034   %}
22035   ins_pipe( pipe_slow );
22036 %}
22037 
22038 //Integer variable shift
22039 instruct vshiftI_var(vec dst, vec src, vec shift) %{
22040   predicate(n->as_ShiftV()->is_var_shift());
22041   match(Set dst ( LShiftVI src shift));
22042   match(Set dst ( RShiftVI src shift));
22043   match(Set dst (URShiftVI src shift));
22044   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
22045   ins_encode %{
22046     assert(UseAVX >= 2, "required");
22047 
22048     int opcode = this->ideal_Opcode();
22049     int vlen_enc = vector_length_encoding(this);
22050     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22051   %}
22052   ins_pipe( pipe_slow );
22053 %}
22054 
22055 //Long variable shift
22056 instruct vshiftL_var(vec dst, vec src, vec shift) %{
22057   predicate(n->as_ShiftV()->is_var_shift());
22058   match(Set dst ( LShiftVL src shift));
22059   match(Set dst (URShiftVL src shift));
22060   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
22061   ins_encode %{
22062     assert(UseAVX >= 2, "required");
22063 
22064     int opcode = this->ideal_Opcode();
22065     int vlen_enc = vector_length_encoding(this);
22066     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22067   %}
22068   ins_pipe( pipe_slow );
22069 %}
22070 
22071 //Long variable right shift arithmetic
22072 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
22073   predicate(Matcher::vector_length(n) <= 4 &&
22074             n->as_ShiftV()->is_var_shift() &&
22075             UseAVX == 2);
22076   match(Set dst (RShiftVL src shift));
22077   effect(TEMP dst, TEMP vtmp);
22078   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
22079   ins_encode %{
22080     int opcode = this->ideal_Opcode();
22081     int vlen_enc = vector_length_encoding(this);
22082     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
22083                  $vtmp$$XMMRegister);
22084   %}
22085   ins_pipe( pipe_slow );
22086 %}
22087 
22088 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
22089   predicate(n->as_ShiftV()->is_var_shift() &&
22090             UseAVX > 2);
22091   match(Set dst (RShiftVL src shift));
22092   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
22093   ins_encode %{
22094     int opcode = this->ideal_Opcode();
22095     int vlen_enc = vector_length_encoding(this);
22096     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22097   %}
22098   ins_pipe( pipe_slow );
22099 %}
22100 
22101 // --------------------------------- AND --------------------------------------
22102 
22103 instruct vand(vec dst, vec src) %{
22104   predicate(UseAVX == 0);
22105   match(Set dst (AndV dst src));
22106   format %{ "pand    $dst,$src\t! and vectors" %}
22107   ins_encode %{
22108     __ pand($dst$$XMMRegister, $src$$XMMRegister);
22109   %}
22110   ins_pipe( pipe_slow );
22111 %}
22112 
22113 instruct vand_reg(vec dst, vec src1, vec src2) %{
22114   predicate(UseAVX > 0);
22115   match(Set dst (AndV src1 src2));
22116   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
22117   ins_encode %{
22118     int vlen_enc = vector_length_encoding(this);
22119     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22120   %}
22121   ins_pipe( pipe_slow );
22122 %}
22123 
22124 instruct vand_mem(vec dst, vec src, memory mem) %{
22125   predicate((UseAVX > 0) &&
22126             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22127   match(Set dst (AndV src (LoadVector mem)));
22128   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
22129   ins_encode %{
22130     int vlen_enc = vector_length_encoding(this);
22131     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22132   %}
22133   ins_pipe( pipe_slow );
22134 %}
22135 
22136 // --------------------------------- OR ---------------------------------------
22137 
22138 instruct vor(vec dst, vec src) %{
22139   predicate(UseAVX == 0);
22140   match(Set dst (OrV dst src));
22141   format %{ "por     $dst,$src\t! or vectors" %}
22142   ins_encode %{
22143     __ por($dst$$XMMRegister, $src$$XMMRegister);
22144   %}
22145   ins_pipe( pipe_slow );
22146 %}
22147 
22148 instruct vor_reg(vec dst, vec src1, vec src2) %{
22149   predicate(UseAVX > 0);
22150   match(Set dst (OrV src1 src2));
22151   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
22152   ins_encode %{
22153     int vlen_enc = vector_length_encoding(this);
22154     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22155   %}
22156   ins_pipe( pipe_slow );
22157 %}
22158 
22159 instruct vor_mem(vec dst, vec src, memory mem) %{
22160   predicate((UseAVX > 0) &&
22161             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22162   match(Set dst (OrV src (LoadVector mem)));
22163   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
22164   ins_encode %{
22165     int vlen_enc = vector_length_encoding(this);
22166     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22167   %}
22168   ins_pipe( pipe_slow );
22169 %}
22170 
22171 // --------------------------------- XOR --------------------------------------
22172 
22173 instruct vxor(vec dst, vec src) %{
22174   predicate(UseAVX == 0);
22175   match(Set dst (XorV dst src));
22176   format %{ "pxor    $dst,$src\t! xor vectors" %}
22177   ins_encode %{
22178     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
22179   %}
22180   ins_pipe( pipe_slow );
22181 %}
22182 
22183 instruct vxor_reg(vec dst, vec src1, vec src2) %{
22184   predicate(UseAVX > 0);
22185   match(Set dst (XorV src1 src2));
22186   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
22187   ins_encode %{
22188     int vlen_enc = vector_length_encoding(this);
22189     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22190   %}
22191   ins_pipe( pipe_slow );
22192 %}
22193 
22194 instruct vxor_mem(vec dst, vec src, memory mem) %{
22195   predicate((UseAVX > 0) &&
22196             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22197   match(Set dst (XorV src (LoadVector mem)));
22198   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
22199   ins_encode %{
22200     int vlen_enc = vector_length_encoding(this);
22201     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22202   %}
22203   ins_pipe( pipe_slow );
22204 %}
22205 
22206 // --------------------------------- VectorCast --------------------------------------
22207 
22208 instruct vcastBtoX(vec dst, vec src) %{
22209   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
22210   match(Set dst (VectorCastB2X src));
22211   format %{ "vector_cast_b2x $dst,$src\t!" %}
22212   ins_encode %{
22213     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22214     int vlen_enc = vector_length_encoding(this);
22215     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22216   %}
22217   ins_pipe( pipe_slow );
22218 %}
22219 
22220 instruct vcastBtoD(legVec dst, legVec src) %{
22221   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
22222   match(Set dst (VectorCastB2X src));
22223   format %{ "vector_cast_b2x $dst,$src\t!" %}
22224   ins_encode %{
22225     int vlen_enc = vector_length_encoding(this);
22226     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22227   %}
22228   ins_pipe( pipe_slow );
22229 %}
22230 
22231 instruct castStoX(vec dst, vec src) %{
22232   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22233             Matcher::vector_length(n->in(1)) <= 8 && // src
22234             Matcher::vector_element_basic_type(n) == T_BYTE);
22235   match(Set dst (VectorCastS2X src));
22236   format %{ "vector_cast_s2x $dst,$src" %}
22237   ins_encode %{
22238     assert(UseAVX > 0, "required");
22239 
22240     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
22241     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
22242   %}
22243   ins_pipe( pipe_slow );
22244 %}
22245 
22246 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22247   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22248             Matcher::vector_length(n->in(1)) == 16 && // src
22249             Matcher::vector_element_basic_type(n) == T_BYTE);
22250   effect(TEMP dst, TEMP vtmp);
22251   match(Set dst (VectorCastS2X src));
22252   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22253   ins_encode %{
22254     assert(UseAVX > 0, "required");
22255 
22256     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22257     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22258     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22259     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22260   %}
22261   ins_pipe( pipe_slow );
22262 %}
22263 
22264 instruct vcastStoX_evex(vec dst, vec src) %{
22265   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22266             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22267   match(Set dst (VectorCastS2X src));
22268   format %{ "vector_cast_s2x $dst,$src\t!" %}
22269   ins_encode %{
22270     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22271     int src_vlen_enc = vector_length_encoding(this, $src);
22272     int vlen_enc = vector_length_encoding(this);
22273     switch (to_elem_bt) {
22274       case T_BYTE:
22275         if (!VM_Version::supports_avx512vl()) {
22276           vlen_enc = Assembler::AVX_512bit;
22277         }
22278         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22279         break;
22280       case T_INT:
22281         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22282         break;
22283       case T_FLOAT:
22284         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22285         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22286         break;
22287       case T_LONG:
22288         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22289         break;
22290       case T_DOUBLE: {
22291         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22292         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22293         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22294         break;
22295       }
22296       default:
22297         ShouldNotReachHere();
22298     }
22299   %}
22300   ins_pipe( pipe_slow );
22301 %}
22302 
22303 instruct castItoX(vec dst, vec src) %{
22304   predicate(UseAVX <= 2 &&
22305             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22306             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22307   match(Set dst (VectorCastI2X src));
22308   format %{ "vector_cast_i2x $dst,$src" %}
22309   ins_encode %{
22310     assert(UseAVX > 0, "required");
22311 
22312     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22313     int vlen_enc = vector_length_encoding(this, $src);
22314 
22315     if (to_elem_bt == T_BYTE) {
22316       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22317       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22318       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22319     } else {
22320       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22321       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22322       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22323     }
22324   %}
22325   ins_pipe( pipe_slow );
22326 %}
22327 
22328 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22329   predicate(UseAVX <= 2 &&
22330             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22331             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22332   match(Set dst (VectorCastI2X src));
22333   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22334   effect(TEMP dst, TEMP vtmp);
22335   ins_encode %{
22336     assert(UseAVX > 0, "required");
22337 
22338     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22339     int vlen_enc = vector_length_encoding(this, $src);
22340 
22341     if (to_elem_bt == T_BYTE) {
22342       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22343       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22344       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22345       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22346     } else {
22347       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22348       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22349       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22350       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22351     }
22352   %}
22353   ins_pipe( pipe_slow );
22354 %}
22355 
22356 instruct vcastItoX_evex(vec dst, vec src) %{
22357   predicate(UseAVX > 2 ||
22358             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22359   match(Set dst (VectorCastI2X src));
22360   format %{ "vector_cast_i2x $dst,$src\t!" %}
22361   ins_encode %{
22362     assert(UseAVX > 0, "required");
22363 
22364     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22365     int src_vlen_enc = vector_length_encoding(this, $src);
22366     int dst_vlen_enc = vector_length_encoding(this);
22367     switch (dst_elem_bt) {
22368       case T_BYTE:
22369         if (!VM_Version::supports_avx512vl()) {
22370           src_vlen_enc = Assembler::AVX_512bit;
22371         }
22372         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22373         break;
22374       case T_SHORT:
22375         if (!VM_Version::supports_avx512vl()) {
22376           src_vlen_enc = Assembler::AVX_512bit;
22377         }
22378         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22379         break;
22380       case T_FLOAT:
22381         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22382         break;
22383       case T_LONG:
22384         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22385         break;
22386       case T_DOUBLE:
22387         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22388         break;
22389       default:
22390         ShouldNotReachHere();
22391     }
22392   %}
22393   ins_pipe( pipe_slow );
22394 %}
22395 
22396 instruct vcastLtoBS(vec dst, vec src) %{
22397   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22398             UseAVX <= 2);
22399   match(Set dst (VectorCastL2X src));
22400   format %{ "vector_cast_l2x  $dst,$src" %}
22401   ins_encode %{
22402     assert(UseAVX > 0, "required");
22403 
22404     int vlen = Matcher::vector_length_in_bytes(this, $src);
22405     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
22406     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22407                                                       : ExternalAddress(vector_int_to_short_mask());
22408     if (vlen <= 16) {
22409       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22410       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22411       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22412     } else {
22413       assert(vlen <= 32, "required");
22414       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22415       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22416       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22417       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22418     }
22419     if (to_elem_bt == T_BYTE) {
22420       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22421     }
22422   %}
22423   ins_pipe( pipe_slow );
22424 %}
22425 
22426 instruct vcastLtoX_evex(vec dst, vec src) %{
22427   predicate(UseAVX > 2 ||
22428             (Matcher::vector_element_basic_type(n) == T_INT ||
22429              Matcher::vector_element_basic_type(n) == T_FLOAT ||
22430              Matcher::vector_element_basic_type(n) == T_DOUBLE));
22431   match(Set dst (VectorCastL2X src));
22432   format %{ "vector_cast_l2x  $dst,$src\t!" %}
22433   ins_encode %{
22434     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22435     int vlen = Matcher::vector_length_in_bytes(this, $src);
22436     int vlen_enc = vector_length_encoding(this, $src);
22437     switch (to_elem_bt) {
22438       case T_BYTE:
22439         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22440           vlen_enc = Assembler::AVX_512bit;
22441         }
22442         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22443         break;
22444       case T_SHORT:
22445         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22446           vlen_enc = Assembler::AVX_512bit;
22447         }
22448         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22449         break;
22450       case T_INT:
22451         if (vlen == 8) {
22452           if ($dst$$XMMRegister != $src$$XMMRegister) {
22453             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22454           }
22455         } else if (vlen == 16) {
22456           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22457         } else if (vlen == 32) {
22458           if (UseAVX > 2) {
22459             if (!VM_Version::supports_avx512vl()) {
22460               vlen_enc = Assembler::AVX_512bit;
22461             }
22462             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22463           } else {
22464             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22465             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22466           }
22467         } else { // vlen == 64
22468           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22469         }
22470         break;
22471       case T_FLOAT:
22472         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22473         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22474         break;
22475       case T_DOUBLE:
22476         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22477         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22478         break;
22479 
22480       default: assert(false, "%s", type2name(to_elem_bt));
22481     }
22482   %}
22483   ins_pipe( pipe_slow );
22484 %}
22485 
22486 instruct vcastFtoD_reg(vec dst, vec src) %{
22487   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22488   match(Set dst (VectorCastF2X src));
22489   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22490   ins_encode %{
22491     int vlen_enc = vector_length_encoding(this);
22492     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22493   %}
22494   ins_pipe( pipe_slow );
22495 %}
22496 
22497 
22498 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22499   predicate(!VM_Version::supports_avx10_2() &&
22500             !VM_Version::supports_avx512vl() &&
22501             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22502             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22503             is_integral_type(Matcher::vector_element_basic_type(n)));
22504   match(Set dst (VectorCastF2X src));
22505   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22506   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22507   ins_encode %{
22508     int vlen_enc = vector_length_encoding(this, $src);
22509     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22510     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22511     // 32 bit addresses for register indirect addressing mode since stub constants
22512     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22513     // However, targets are free to increase this limit, but having a large code cache size
22514     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22515     // cap we save a temporary register allocation which in limiting case can prevent
22516     // spilling in high register pressure blocks.
22517     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22518                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22519                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22520   %}
22521   ins_pipe( pipe_slow );
22522 %}
22523 
22524 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22525   predicate(!VM_Version::supports_avx10_2() &&
22526             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22527             is_integral_type(Matcher::vector_element_basic_type(n)));
22528   match(Set dst (VectorCastF2X src));
22529   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22530   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22531   ins_encode %{
22532     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22533     if (to_elem_bt == T_LONG) {
22534       int vlen_enc = vector_length_encoding(this);
22535       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22536                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22537                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22538     } else {
22539       int vlen_enc = vector_length_encoding(this, $src);
22540       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22541                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22542                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22543     }
22544   %}
22545   ins_pipe( pipe_slow );
22546 %}
22547 
22548 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22549   predicate(VM_Version::supports_avx10_2() &&
22550             is_integral_type(Matcher::vector_element_basic_type(n)));
22551   match(Set dst (VectorCastF2X src));
22552   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22553   ins_encode %{
22554     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22555     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22556     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22557   %}
22558   ins_pipe( pipe_slow );
22559 %}
22560 
22561 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22562   predicate(VM_Version::supports_avx10_2() &&
22563             is_integral_type(Matcher::vector_element_basic_type(n)));
22564   match(Set dst (VectorCastF2X (LoadVector src)));
22565   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22566   ins_encode %{
22567     int vlen = Matcher::vector_length(this);
22568     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22569     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22570     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22571   %}
22572   ins_pipe( pipe_slow );
22573 %}
22574 
22575 instruct vcastDtoF_reg(vec dst, vec src) %{
22576   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22577   match(Set dst (VectorCastD2X src));
22578   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22579   ins_encode %{
22580     int vlen_enc = vector_length_encoding(this, $src);
22581     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22582   %}
22583   ins_pipe( pipe_slow );
22584 %}
22585 
22586 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22587   predicate(!VM_Version::supports_avx10_2() &&
22588             !VM_Version::supports_avx512vl() &&
22589             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22590             is_integral_type(Matcher::vector_element_basic_type(n)));
22591   match(Set dst (VectorCastD2X src));
22592   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22593   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22594   ins_encode %{
22595     int vlen_enc = vector_length_encoding(this, $src);
22596     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22597     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22598                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22599                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22600   %}
22601   ins_pipe( pipe_slow );
22602 %}
22603 
22604 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22605   predicate(!VM_Version::supports_avx10_2() &&
22606             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22607             is_integral_type(Matcher::vector_element_basic_type(n)));
22608   match(Set dst (VectorCastD2X src));
22609   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22610   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22611   ins_encode %{
22612     int vlen_enc = vector_length_encoding(this, $src);
22613     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22614     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22615                               ExternalAddress(vector_float_signflip());
22616     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22617                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22618   %}
22619   ins_pipe( pipe_slow );
22620 %}
22621 
22622 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22623   predicate(VM_Version::supports_avx10_2() &&
22624             is_integral_type(Matcher::vector_element_basic_type(n)));
22625   match(Set dst (VectorCastD2X src));
22626   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22627   ins_encode %{
22628     int vlen_enc = vector_length_encoding(this, $src);
22629     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22630     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22631   %}
22632   ins_pipe( pipe_slow );
22633 %}
22634 
22635 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22636   predicate(VM_Version::supports_avx10_2() &&
22637             is_integral_type(Matcher::vector_element_basic_type(n)));
22638   match(Set dst (VectorCastD2X (LoadVector src)));
22639   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22640   ins_encode %{
22641     int vlen = Matcher::vector_length(this);
22642     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22643     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22644     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22645   %}
22646   ins_pipe( pipe_slow );
22647 %}
22648 
22649 instruct vucast(vec dst, vec src) %{
22650   match(Set dst (VectorUCastB2X src));
22651   match(Set dst (VectorUCastS2X src));
22652   match(Set dst (VectorUCastI2X src));
22653   format %{ "vector_ucast $dst,$src\t!" %}
22654   ins_encode %{
22655     assert(UseAVX > 0, "required");
22656 
22657     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22658     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22659     int vlen_enc = vector_length_encoding(this);
22660     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22661   %}
22662   ins_pipe( pipe_slow );
22663 %}
22664 
22665 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22666   predicate(!VM_Version::supports_avx512vl() &&
22667             Matcher::vector_length_in_bytes(n) < 64 &&
22668             Matcher::vector_element_basic_type(n) == T_INT);
22669   match(Set dst (RoundVF src));
22670   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22671   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22672   ins_encode %{
22673     int vlen_enc = vector_length_encoding(this);
22674     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22675     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22676                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22677                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22678   %}
22679   ins_pipe( pipe_slow );
22680 %}
22681 
22682 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22683   predicate((VM_Version::supports_avx512vl() ||
22684              Matcher::vector_length_in_bytes(n) == 64) &&
22685              Matcher::vector_element_basic_type(n) == T_INT);
22686   match(Set dst (RoundVF src));
22687   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22688   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22689   ins_encode %{
22690     int vlen_enc = vector_length_encoding(this);
22691     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22692     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22693                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22694                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22695   %}
22696   ins_pipe( pipe_slow );
22697 %}
22698 
22699 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22700   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22701   match(Set dst (RoundVD src));
22702   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22703   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22704   ins_encode %{
22705     int vlen_enc = vector_length_encoding(this);
22706     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22707     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22708                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22709                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22710   %}
22711   ins_pipe( pipe_slow );
22712 %}
22713 
22714 // --------------------------------- VectorMaskCmp --------------------------------------
22715 
22716 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22717   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22718             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22719             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22720             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22721   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22722   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22723   ins_encode %{
22724     int vlen_enc = vector_length_encoding(this, $src1);
22725     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22726     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22727       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22728     } else {
22729       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22730     }
22731   %}
22732   ins_pipe( pipe_slow );
22733 %}
22734 
22735 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22736   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22737             n->bottom_type()->isa_vectmask() == nullptr &&
22738             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22739   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22740   effect(TEMP ktmp);
22741   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22742   ins_encode %{
22743     int vlen_enc = Assembler::AVX_512bit;
22744     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22745     KRegister mask = k0; // The comparison itself is not being masked.
22746     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22747       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22748       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22749     } else {
22750       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22751       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22752     }
22753   %}
22754   ins_pipe( pipe_slow );
22755 %}
22756 
22757 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22758   predicate(n->bottom_type()->isa_vectmask() &&
22759             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22760   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22761   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22762   ins_encode %{
22763     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22764     int vlen_enc = vector_length_encoding(this, $src1);
22765     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22766     KRegister mask = k0; // The comparison itself is not being masked.
22767     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22768       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22769     } else {
22770       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22771     }
22772   %}
22773   ins_pipe( pipe_slow );
22774 %}
22775 
22776 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22777   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22778             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22779             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22780             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22781             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22782             (n->in(2)->get_int() == BoolTest::eq ||
22783              n->in(2)->get_int() == BoolTest::lt ||
22784              n->in(2)->get_int() == BoolTest::gt)); // cond
22785   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22786   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22787   ins_encode %{
22788     int vlen_enc = vector_length_encoding(this, $src1);
22789     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22790     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22791     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22792   %}
22793   ins_pipe( pipe_slow );
22794 %}
22795 
22796 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22797   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22798             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22799             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22800             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22801             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22802             (n->in(2)->get_int() == BoolTest::ne ||
22803              n->in(2)->get_int() == BoolTest::le ||
22804              n->in(2)->get_int() == BoolTest::ge)); // cond
22805   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22806   effect(TEMP dst, TEMP xtmp);
22807   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22808   ins_encode %{
22809     int vlen_enc = vector_length_encoding(this, $src1);
22810     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22811     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22812     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22813   %}
22814   ins_pipe( pipe_slow );
22815 %}
22816 
22817 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22818   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22819             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22820             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22821             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22822             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22823   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22824   effect(TEMP dst, TEMP xtmp);
22825   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22826   ins_encode %{
22827     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22828     int vlen_enc = vector_length_encoding(this, $src1);
22829     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22830     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22831 
22832     if (vlen_enc == Assembler::AVX_128bit) {
22833       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22834     } else {
22835       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22836     }
22837     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22838     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22839     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22840   %}
22841   ins_pipe( pipe_slow );
22842 %}
22843 
22844 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22845   predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22846              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22847              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22848   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22849   effect(TEMP ktmp);
22850   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22851   ins_encode %{
22852     assert(UseAVX > 2, "required");
22853 
22854     int vlen_enc = vector_length_encoding(this, $src1);
22855     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22856     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22857     KRegister mask = k0; // The comparison itself is not being masked.
22858     bool merge = false;
22859     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22860 
22861     switch (src1_elem_bt) {
22862       case T_INT: {
22863         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22864         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22865         break;
22866       }
22867       case T_LONG: {
22868         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22869         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22870         break;
22871       }
22872       default: assert(false, "%s", type2name(src1_elem_bt));
22873     }
22874   %}
22875   ins_pipe( pipe_slow );
22876 %}
22877 
22878 
22879 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22880   predicate(n->bottom_type()->isa_vectmask() &&
22881             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22882   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22883   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22884   ins_encode %{
22885     assert(UseAVX > 2, "required");
22886     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22887 
22888     int vlen_enc = vector_length_encoding(this, $src1);
22889     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22890     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22891     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22892 
22893     // Comparison i
22894     switch (src1_elem_bt) {
22895       case T_BYTE: {
22896         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22897         break;
22898       }
22899       case T_SHORT: {
22900         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22901         break;
22902       }
22903       case T_INT: {
22904         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22905         break;
22906       }
22907       case T_LONG: {
22908         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22909         break;
22910       }
22911       default: assert(false, "%s", type2name(src1_elem_bt));
22912     }
22913   %}
22914   ins_pipe( pipe_slow );
22915 %}
22916 
22917 // Extract
22918 
22919 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22920   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22921   match(Set dst (ExtractI src idx));
22922   match(Set dst (ExtractS src idx));
22923   match(Set dst (ExtractB src idx));
22924   format %{ "extractI $dst,$src,$idx\t!" %}
22925   ins_encode %{
22926     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22927 
22928     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22929     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22930   %}
22931   ins_pipe( pipe_slow );
22932 %}
22933 
22934 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22935   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22936             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22937   match(Set dst (ExtractI src idx));
22938   match(Set dst (ExtractS src idx));
22939   match(Set dst (ExtractB src idx));
22940   effect(TEMP vtmp);
22941   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22942   ins_encode %{
22943     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22944 
22945     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22946     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22947     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22948   %}
22949   ins_pipe( pipe_slow );
22950 %}
22951 
22952 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22953   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22954   match(Set dst (ExtractL src idx));
22955   format %{ "extractL $dst,$src,$idx\t!" %}
22956   ins_encode %{
22957     assert(UseSSE >= 4, "required");
22958     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22959 
22960     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22961   %}
22962   ins_pipe( pipe_slow );
22963 %}
22964 
22965 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22966   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22967             Matcher::vector_length(n->in(1)) == 8);  // src
22968   match(Set dst (ExtractL src idx));
22969   effect(TEMP vtmp);
22970   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22971   ins_encode %{
22972     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22973 
22974     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22975     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22976   %}
22977   ins_pipe( pipe_slow );
22978 %}
22979 
22980 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22981   predicate(Matcher::vector_length(n->in(1)) <= 4);
22982   match(Set dst (ExtractF src idx));
22983   effect(TEMP dst, TEMP vtmp);
22984   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22985   ins_encode %{
22986     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22987 
22988     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22989   %}
22990   ins_pipe( pipe_slow );
22991 %}
22992 
22993 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22994   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22995             Matcher::vector_length(n->in(1)/*src*/) == 16);
22996   match(Set dst (ExtractF src idx));
22997   effect(TEMP vtmp);
22998   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22999   ins_encode %{
23000     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23001 
23002     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23003     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
23004   %}
23005   ins_pipe( pipe_slow );
23006 %}
23007 
23008 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
23009   predicate(Matcher::vector_length(n->in(1)) == 2); // src
23010   match(Set dst (ExtractD src idx));
23011   format %{ "extractD $dst,$src,$idx\t!" %}
23012   ins_encode %{
23013     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23014 
23015     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23016   %}
23017   ins_pipe( pipe_slow );
23018 %}
23019 
23020 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
23021   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
23022             Matcher::vector_length(n->in(1)) == 8);  // src
23023   match(Set dst (ExtractD src idx));
23024   effect(TEMP vtmp);
23025   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
23026   ins_encode %{
23027     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23028 
23029     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23030     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
23031   %}
23032   ins_pipe( pipe_slow );
23033 %}
23034 
23035 // --------------------------------- Vector Blend --------------------------------------
23036 
23037 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
23038   predicate(UseAVX == 0);
23039   match(Set dst (VectorBlend (Binary dst src) mask));
23040   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
23041   effect(TEMP tmp);
23042   ins_encode %{
23043     assert(UseSSE >= 4, "required");
23044 
23045     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
23046       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
23047     }
23048     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
23049   %}
23050   ins_pipe( pipe_slow );
23051 %}
23052 
23053 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
23054   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23055             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23056             Matcher::vector_length_in_bytes(n) <= 32 &&
23057             is_integral_type(Matcher::vector_element_basic_type(n)));
23058   match(Set dst (VectorBlend (Binary src1 src2) mask));
23059   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
23060   ins_encode %{
23061     int vlen_enc = vector_length_encoding(this);
23062     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23063   %}
23064   ins_pipe( pipe_slow );
23065 %}
23066 
23067 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
23068   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23069             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23070             Matcher::vector_length_in_bytes(n) <= 32 &&
23071             !is_integral_type(Matcher::vector_element_basic_type(n)));
23072   match(Set dst (VectorBlend (Binary src1 src2) mask));
23073   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
23074   ins_encode %{
23075     int vlen_enc = vector_length_encoding(this);
23076     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23077   %}
23078   ins_pipe( pipe_slow );
23079 %}
23080 
23081 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
23082   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
23083             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23084             Matcher::vector_length_in_bytes(n) <= 32);
23085   match(Set dst (VectorBlend (Binary src1 src2) mask));
23086   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
23087   effect(TEMP vtmp, TEMP dst);
23088   ins_encode %{
23089     int vlen_enc = vector_length_encoding(this);
23090     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
23091     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23092     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
23093   %}
23094   ins_pipe( pipe_slow );
23095 %}
23096 
23097 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
23098   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
23099             n->in(2)->bottom_type()->isa_vectmask() == nullptr);
23100   match(Set dst (VectorBlend (Binary src1 src2) mask));
23101   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23102   effect(TEMP ktmp);
23103   ins_encode %{
23104      int vlen_enc = Assembler::AVX_512bit;
23105      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23106     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
23107     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23108   %}
23109   ins_pipe( pipe_slow );
23110 %}
23111 
23112 
23113 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
23114   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
23115             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
23116              VM_Version::supports_avx512bw()));
23117   match(Set dst (VectorBlend (Binary src1 src2) mask));
23118   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23119   ins_encode %{
23120     int vlen_enc = vector_length_encoding(this);
23121     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23122     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23123   %}
23124   ins_pipe( pipe_slow );
23125 %}
23126 
23127 // --------------------------------- ABS --------------------------------------
23128 // a = |a|
23129 instruct vabsB_reg(vec dst, vec src) %{
23130   match(Set dst (AbsVB  src));
23131   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
23132   ins_encode %{
23133     uint vlen = Matcher::vector_length(this);
23134     if (vlen <= 16) {
23135       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23136     } else {
23137       int vlen_enc = vector_length_encoding(this);
23138       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23139     }
23140   %}
23141   ins_pipe( pipe_slow );
23142 %}
23143 
23144 instruct vabsS_reg(vec dst, vec src) %{
23145   match(Set dst (AbsVS  src));
23146   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
23147   ins_encode %{
23148     uint vlen = Matcher::vector_length(this);
23149     if (vlen <= 8) {
23150       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23151     } else {
23152       int vlen_enc = vector_length_encoding(this);
23153       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23154     }
23155   %}
23156   ins_pipe( pipe_slow );
23157 %}
23158 
23159 instruct vabsI_reg(vec dst, vec src) %{
23160   match(Set dst (AbsVI  src));
23161   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
23162   ins_encode %{
23163     uint vlen = Matcher::vector_length(this);
23164     if (vlen <= 4) {
23165       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23166     } else {
23167       int vlen_enc = vector_length_encoding(this);
23168       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23169     }
23170   %}
23171   ins_pipe( pipe_slow );
23172 %}
23173 
23174 instruct vabsL_reg(vec dst, vec src) %{
23175   match(Set dst (AbsVL  src));
23176   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
23177   ins_encode %{
23178     assert(UseAVX > 2, "required");
23179     int vlen_enc = vector_length_encoding(this);
23180     if (!VM_Version::supports_avx512vl()) {
23181       vlen_enc = Assembler::AVX_512bit;
23182     }
23183     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23184   %}
23185   ins_pipe( pipe_slow );
23186 %}
23187 
23188 // --------------------------------- ABSNEG --------------------------------------
23189 
23190 instruct vabsnegF(vec dst, vec src) %{
23191   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
23192   match(Set dst (AbsVF src));
23193   match(Set dst (NegVF src));
23194   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
23195   ins_cost(150);
23196   ins_encode %{
23197     int opcode = this->ideal_Opcode();
23198     int vlen = Matcher::vector_length(this);
23199     if (vlen == 2) {
23200       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23201     } else {
23202       assert(vlen == 8 || vlen == 16, "required");
23203       int vlen_enc = vector_length_encoding(this);
23204       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23205     }
23206   %}
23207   ins_pipe( pipe_slow );
23208 %}
23209 
23210 instruct vabsneg4F(vec dst) %{
23211   predicate(Matcher::vector_length(n) == 4);
23212   match(Set dst (AbsVF dst));
23213   match(Set dst (NegVF dst));
23214   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
23215   ins_cost(150);
23216   ins_encode %{
23217     int opcode = this->ideal_Opcode();
23218     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
23219   %}
23220   ins_pipe( pipe_slow );
23221 %}
23222 
23223 instruct vabsnegD(vec dst, vec src) %{
23224   match(Set dst (AbsVD  src));
23225   match(Set dst (NegVD  src));
23226   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
23227   ins_encode %{
23228     int opcode = this->ideal_Opcode();
23229     uint vlen = Matcher::vector_length(this);
23230     if (vlen == 2) {
23231       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23232     } else {
23233       int vlen_enc = vector_length_encoding(this);
23234       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23235     }
23236   %}
23237   ins_pipe( pipe_slow );
23238 %}
23239 
23240 //------------------------------------- VectorTest --------------------------------------------
23241 
23242 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
23243   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
23244   match(Set cr (VectorTest src1 src2));
23245   effect(TEMP vtmp);
23246   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
23247   ins_encode %{
23248     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23249     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23250     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23251   %}
23252   ins_pipe( pipe_slow );
23253 %}
23254 
23255 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23256   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23257   match(Set cr (VectorTest src1 src2));
23258   format %{ "vptest_ge16  $src1, $src2\n\t" %}
23259   ins_encode %{
23260     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23261     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23262     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23263   %}
23264   ins_pipe( pipe_slow );
23265 %}
23266 
23267 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23268   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23269              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23270             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23271   match(Set cr (VectorTest src1 src2));
23272   effect(TEMP tmp);
23273   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23274   ins_encode %{
23275     uint masklen = Matcher::vector_length(this, $src1);
23276     __ kmovwl($tmp$$Register, $src1$$KRegister);
23277     __ andl($tmp$$Register, (1 << masklen) - 1);
23278     __ cmpl($tmp$$Register, (1 << masklen) - 1);
23279   %}
23280   ins_pipe( pipe_slow );
23281 %}
23282 
23283 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23284   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23285              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23286             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23287   match(Set cr (VectorTest src1 src2));
23288   effect(TEMP tmp);
23289   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23290   ins_encode %{
23291     uint masklen = Matcher::vector_length(this, $src1);
23292     __ kmovwl($tmp$$Register, $src1$$KRegister);
23293     __ andl($tmp$$Register, (1 << masklen) - 1);
23294   %}
23295   ins_pipe( pipe_slow );
23296 %}
23297 
23298 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23299   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23300             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23301   match(Set cr (VectorTest src1 src2));
23302   format %{ "ktest_ge8  $src1, $src2\n\t" %}
23303   ins_encode %{
23304     uint masklen = Matcher::vector_length(this, $src1);
23305     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23306   %}
23307   ins_pipe( pipe_slow );
23308 %}
23309 
23310 //------------------------------------- LoadMask --------------------------------------------
23311 
23312 instruct loadMask(legVec dst, legVec src) %{
23313   predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23314   match(Set dst (VectorLoadMask src));
23315   effect(TEMP dst);
23316   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23317   ins_encode %{
23318     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23319     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23320     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23321   %}
23322   ins_pipe( pipe_slow );
23323 %}
23324 
23325 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23326   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23327   match(Set dst (VectorLoadMask src));
23328   effect(TEMP xtmp);
23329   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23330   ins_encode %{
23331     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23332                         true, Assembler::AVX_512bit);
23333   %}
23334   ins_pipe( pipe_slow );
23335 %}
23336 
23337 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
23338   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23339   match(Set dst (VectorLoadMask src));
23340   effect(TEMP xtmp);
23341   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23342   ins_encode %{
23343     int vlen_enc = vector_length_encoding(in(1));
23344     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23345                         false, vlen_enc);
23346   %}
23347   ins_pipe( pipe_slow );
23348 %}
23349 
23350 //------------------------------------- StoreMask --------------------------------------------
23351 
23352 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23353   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23354   match(Set dst (VectorStoreMask src size));
23355   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23356   ins_encode %{
23357     int vlen = Matcher::vector_length(this);
23358     if (vlen <= 16 && UseAVX <= 2) {
23359       assert(UseSSE >= 3, "required");
23360       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23361     } else {
23362       assert(UseAVX > 0, "required");
23363       int src_vlen_enc = vector_length_encoding(this, $src);
23364       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23365     }
23366   %}
23367   ins_pipe( pipe_slow );
23368 %}
23369 
23370 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23371   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23372   match(Set dst (VectorStoreMask src size));
23373   effect(TEMP_DEF dst, TEMP xtmp);
23374   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23375   ins_encode %{
23376     int vlen_enc = Assembler::AVX_128bit;
23377     int vlen = Matcher::vector_length(this);
23378     if (vlen <= 8) {
23379       assert(UseSSE >= 3, "required");
23380       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23381       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23382       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23383     } else {
23384       assert(UseAVX > 0, "required");
23385       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23386       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23387       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23388     }
23389   %}
23390   ins_pipe( pipe_slow );
23391 %}
23392 
23393 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23394   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23395   match(Set dst (VectorStoreMask src size));
23396   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23397   effect(TEMP_DEF dst, TEMP xtmp);
23398   ins_encode %{
23399     int vlen_enc = Assembler::AVX_128bit;
23400     int vlen = Matcher::vector_length(this);
23401     if (vlen <= 4) {
23402       assert(UseSSE >= 3, "required");
23403       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23404       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23405       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23406       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23407     } else {
23408       assert(UseAVX > 0, "required");
23409       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23410       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23411       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23412       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23413       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23414     }
23415   %}
23416   ins_pipe( pipe_slow );
23417 %}
23418 
23419 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23420   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23421   match(Set dst (VectorStoreMask src size));
23422   effect(TEMP_DEF dst, TEMP xtmp);
23423   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23424   ins_encode %{
23425     assert(UseSSE >= 3, "required");
23426     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23427     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23428     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23429     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23430     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23431   %}
23432   ins_pipe( pipe_slow );
23433 %}
23434 
23435 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23436   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23437   match(Set dst (VectorStoreMask src size));
23438   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23439   effect(TEMP_DEF dst, TEMP vtmp);
23440   ins_encode %{
23441     int vlen_enc = Assembler::AVX_128bit;
23442     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23443     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23444     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23445     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23446     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23447     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23448     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23449   %}
23450   ins_pipe( pipe_slow );
23451 %}
23452 
23453 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23454   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23455   match(Set dst (VectorStoreMask src size));
23456   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23457   ins_encode %{
23458     int src_vlen_enc = vector_length_encoding(this, $src);
23459     int dst_vlen_enc = vector_length_encoding(this);
23460     if (!VM_Version::supports_avx512vl()) {
23461       src_vlen_enc = Assembler::AVX_512bit;
23462     }
23463     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23464     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23465   %}
23466   ins_pipe( pipe_slow );
23467 %}
23468 
23469 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23470   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23471   match(Set dst (VectorStoreMask src size));
23472   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23473   ins_encode %{
23474     int src_vlen_enc = vector_length_encoding(this, $src);
23475     int dst_vlen_enc = vector_length_encoding(this);
23476     if (!VM_Version::supports_avx512vl()) {
23477       src_vlen_enc = Assembler::AVX_512bit;
23478     }
23479     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23480     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23481   %}
23482   ins_pipe( pipe_slow );
23483 %}
23484 
23485 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23486   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23487   match(Set dst (VectorStoreMask mask size));
23488   effect(TEMP_DEF dst);
23489   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23490   ins_encode %{
23491     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23492     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23493                  false, Assembler::AVX_512bit, noreg);
23494     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23495   %}
23496   ins_pipe( pipe_slow );
23497 %}
23498 
23499 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23500   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23501   match(Set dst (VectorStoreMask mask size));
23502   effect(TEMP_DEF dst);
23503   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23504   ins_encode %{
23505     int dst_vlen_enc = vector_length_encoding(this);
23506     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23507     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23508   %}
23509   ins_pipe( pipe_slow );
23510 %}
23511 
23512 instruct vmaskcast_evex(kReg dst) %{
23513   match(Set dst (VectorMaskCast dst));
23514   ins_cost(0);
23515   format %{ "vector_mask_cast $dst" %}
23516   ins_encode %{
23517     // empty
23518   %}
23519   ins_pipe(empty);
23520 %}
23521 
23522 instruct vmaskcast(vec dst) %{
23523   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23524   match(Set dst (VectorMaskCast dst));
23525   ins_cost(0);
23526   format %{ "vector_mask_cast $dst" %}
23527   ins_encode %{
23528     // empty
23529   %}
23530   ins_pipe(empty);
23531 %}
23532 
23533 instruct vmaskcast_avx(vec dst, vec src) %{
23534   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23535   match(Set dst (VectorMaskCast src));
23536   format %{ "vector_mask_cast $dst, $src" %}
23537   ins_encode %{
23538     int vlen = Matcher::vector_length(this);
23539     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23540     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23541     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23542   %}
23543   ins_pipe(pipe_slow);
23544 %}
23545 
23546 //-------------------------------- Load Iota Indices ----------------------------------
23547 
23548 instruct loadIotaIndices(vec dst, immI_0 src) %{
23549   match(Set dst (VectorLoadConst src));
23550   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23551   ins_encode %{
23552      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23553      BasicType bt = Matcher::vector_element_basic_type(this);
23554      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23555   %}
23556   ins_pipe( pipe_slow );
23557 %}
23558 
23559 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23560   match(Set dst (PopulateIndex src1 src2));
23561   effect(TEMP dst, TEMP vtmp);
23562   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23563   ins_encode %{
23564      assert($src2$$constant == 1, "required");
23565      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23566      int vlen_enc = vector_length_encoding(this);
23567      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23568      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23569      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23570      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23571   %}
23572   ins_pipe( pipe_slow );
23573 %}
23574 
23575 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23576   match(Set dst (PopulateIndex src1 src2));
23577   effect(TEMP dst, TEMP vtmp);
23578   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23579   ins_encode %{
23580      assert($src2$$constant == 1, "required");
23581      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23582      int vlen_enc = vector_length_encoding(this);
23583      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23584      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23585      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23586      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23587   %}
23588   ins_pipe( pipe_slow );
23589 %}
23590 
23591 //-------------------------------- Rearrange ----------------------------------
23592 
23593 // LoadShuffle/Rearrange for Byte
23594 instruct rearrangeB(vec dst, vec shuffle) %{
23595   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23596             Matcher::vector_length(n) < 32);
23597   match(Set dst (VectorRearrange dst shuffle));
23598   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23599   ins_encode %{
23600     assert(UseSSE >= 4, "required");
23601     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23602   %}
23603   ins_pipe( pipe_slow );
23604 %}
23605 
23606 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23607   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23608             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23609   match(Set dst (VectorRearrange src shuffle));
23610   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23611   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23612   ins_encode %{
23613     assert(UseAVX >= 2, "required");
23614     // Swap src into vtmp1
23615     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23616     // Shuffle swapped src to get entries from other 128 bit lane
23617     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23618     // Shuffle original src to get entries from self 128 bit lane
23619     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23620     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23621     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23622     // Perform the blend
23623     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23624   %}
23625   ins_pipe( pipe_slow );
23626 %}
23627 
23628 
23629 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23630   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23631             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23632   match(Set dst (VectorRearrange src shuffle));
23633   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23634   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23635   ins_encode %{
23636     int vlen_enc = vector_length_encoding(this);
23637     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23638                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23639                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23640   %}
23641   ins_pipe( pipe_slow );
23642 %}
23643 
23644 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23645   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23646             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23647   match(Set dst (VectorRearrange src shuffle));
23648   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23649   ins_encode %{
23650     int vlen_enc = vector_length_encoding(this);
23651     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23652   %}
23653   ins_pipe( pipe_slow );
23654 %}
23655 
23656 // LoadShuffle/Rearrange for Short
23657 
23658 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23659   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23660             !VM_Version::supports_avx512bw());
23661   match(Set dst (VectorLoadShuffle src));
23662   effect(TEMP dst, TEMP vtmp);
23663   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23664   ins_encode %{
23665     // Create a byte shuffle mask from short shuffle mask
23666     // only byte shuffle instruction available on these platforms
23667     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23668     if (UseAVX == 0) {
23669       assert(vlen_in_bytes <= 16, "required");
23670       // Multiply each shuffle by two to get byte index
23671       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23672       __ psllw($vtmp$$XMMRegister, 1);
23673 
23674       // Duplicate to create 2 copies of byte index
23675       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23676       __ psllw($dst$$XMMRegister, 8);
23677       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23678 
23679       // Add one to get alternate byte index
23680       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23681       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23682     } else {
23683       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23684       int vlen_enc = vector_length_encoding(this);
23685       // Multiply each shuffle by two to get byte index
23686       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23687 
23688       // Duplicate to create 2 copies of byte index
23689       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23690       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23691 
23692       // Add one to get alternate byte index
23693       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23694     }
23695   %}
23696   ins_pipe( pipe_slow );
23697 %}
23698 
23699 instruct rearrangeS(vec dst, vec shuffle) %{
23700   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23701             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23702   match(Set dst (VectorRearrange dst shuffle));
23703   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23704   ins_encode %{
23705     assert(UseSSE >= 4, "required");
23706     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23707   %}
23708   ins_pipe( pipe_slow );
23709 %}
23710 
23711 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23712   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23713             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23714   match(Set dst (VectorRearrange src shuffle));
23715   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23716   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23717   ins_encode %{
23718     assert(UseAVX >= 2, "required");
23719     // Swap src into vtmp1
23720     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23721     // Shuffle swapped src to get entries from other 128 bit lane
23722     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23723     // Shuffle original src to get entries from self 128 bit lane
23724     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23725     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23726     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23727     // Perform the blend
23728     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23729   %}
23730   ins_pipe( pipe_slow );
23731 %}
23732 
23733 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23734   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23735             VM_Version::supports_avx512bw());
23736   match(Set dst (VectorRearrange src shuffle));
23737   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23738   ins_encode %{
23739     int vlen_enc = vector_length_encoding(this);
23740     if (!VM_Version::supports_avx512vl()) {
23741       vlen_enc = Assembler::AVX_512bit;
23742     }
23743     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23744   %}
23745   ins_pipe( pipe_slow );
23746 %}
23747 
23748 // LoadShuffle/Rearrange for Integer and Float
23749 
23750 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23751   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23752             Matcher::vector_length(n) == 4 && UseAVX == 0);
23753   match(Set dst (VectorLoadShuffle src));
23754   effect(TEMP dst, TEMP vtmp);
23755   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23756   ins_encode %{
23757     assert(UseSSE >= 4, "required");
23758 
23759     // Create a byte shuffle mask from int shuffle mask
23760     // only byte shuffle instruction available on these platforms
23761 
23762     // Duplicate and multiply each shuffle by 4
23763     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23764     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23765     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23766     __ psllw($vtmp$$XMMRegister, 2);
23767 
23768     // Duplicate again to create 4 copies of byte index
23769     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23770     __ psllw($dst$$XMMRegister, 8);
23771     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23772 
23773     // Add 3,2,1,0 to get alternate byte index
23774     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23775     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23776   %}
23777   ins_pipe( pipe_slow );
23778 %}
23779 
23780 instruct rearrangeI(vec dst, vec shuffle) %{
23781   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23782             UseAVX == 0);
23783   match(Set dst (VectorRearrange dst shuffle));
23784   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23785   ins_encode %{
23786     assert(UseSSE >= 4, "required");
23787     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23788   %}
23789   ins_pipe( pipe_slow );
23790 %}
23791 
23792 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23793   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23794             UseAVX > 0);
23795   match(Set dst (VectorRearrange src shuffle));
23796   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23797   ins_encode %{
23798     int vlen_enc = vector_length_encoding(this);
23799     BasicType bt = Matcher::vector_element_basic_type(this);
23800     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23801   %}
23802   ins_pipe( pipe_slow );
23803 %}
23804 
23805 // LoadShuffle/Rearrange for Long and Double
23806 
23807 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23808   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23809             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23810   match(Set dst (VectorLoadShuffle src));
23811   effect(TEMP dst, TEMP vtmp);
23812   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23813   ins_encode %{
23814     assert(UseAVX >= 2, "required");
23815 
23816     int vlen_enc = vector_length_encoding(this);
23817     // Create a double word shuffle mask from long shuffle mask
23818     // only double word shuffle instruction available on these platforms
23819 
23820     // Multiply each shuffle by two to get double word index
23821     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23822 
23823     // Duplicate each double word shuffle
23824     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23825     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23826 
23827     // Add one to get alternate double word index
23828     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23829   %}
23830   ins_pipe( pipe_slow );
23831 %}
23832 
23833 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23834   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23835             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23836   match(Set dst (VectorRearrange src shuffle));
23837   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23838   ins_encode %{
23839     assert(UseAVX >= 2, "required");
23840 
23841     int vlen_enc = vector_length_encoding(this);
23842     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23843   %}
23844   ins_pipe( pipe_slow );
23845 %}
23846 
23847 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23848   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23849             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23850   match(Set dst (VectorRearrange src shuffle));
23851   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23852   ins_encode %{
23853     assert(UseAVX > 2, "required");
23854 
23855     int vlen_enc = vector_length_encoding(this);
23856     if (vlen_enc == Assembler::AVX_128bit) {
23857       vlen_enc = Assembler::AVX_256bit;
23858     }
23859     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23860   %}
23861   ins_pipe( pipe_slow );
23862 %}
23863 
23864 // --------------------------------- FMA --------------------------------------
23865 // a * b + c
23866 
23867 instruct vfmaF_reg(vec a, vec b, vec c) %{
23868   match(Set c (FmaVF  c (Binary a b)));
23869   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23870   ins_cost(150);
23871   ins_encode %{
23872     assert(UseFMA, "not enabled");
23873     int vlen_enc = vector_length_encoding(this);
23874     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23875   %}
23876   ins_pipe( pipe_slow );
23877 %}
23878 
23879 instruct vfmaF_mem(vec a, memory b, vec c) %{
23880   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23881   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23882   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23883   ins_cost(150);
23884   ins_encode %{
23885     assert(UseFMA, "not enabled");
23886     int vlen_enc = vector_length_encoding(this);
23887     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23888   %}
23889   ins_pipe( pipe_slow );
23890 %}
23891 
23892 instruct vfmaD_reg(vec a, vec b, vec c) %{
23893   match(Set c (FmaVD  c (Binary a b)));
23894   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23895   ins_cost(150);
23896   ins_encode %{
23897     assert(UseFMA, "not enabled");
23898     int vlen_enc = vector_length_encoding(this);
23899     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23900   %}
23901   ins_pipe( pipe_slow );
23902 %}
23903 
23904 instruct vfmaD_mem(vec a, memory b, vec c) %{
23905   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23906   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23907   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23908   ins_cost(150);
23909   ins_encode %{
23910     assert(UseFMA, "not enabled");
23911     int vlen_enc = vector_length_encoding(this);
23912     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23913   %}
23914   ins_pipe( pipe_slow );
23915 %}
23916 
23917 // --------------------------------- Vector Multiply Add --------------------------------------
23918 
23919 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23920   predicate(UseAVX == 0);
23921   match(Set dst (MulAddVS2VI dst src1));
23922   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23923   ins_encode %{
23924     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23925   %}
23926   ins_pipe( pipe_slow );
23927 %}
23928 
23929 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23930   predicate(UseAVX > 0);
23931   match(Set dst (MulAddVS2VI src1 src2));
23932   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23933   ins_encode %{
23934     int vlen_enc = vector_length_encoding(this);
23935     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23936   %}
23937   ins_pipe( pipe_slow );
23938 %}
23939 
23940 // --------------------------------- Vector Multiply Add Add ----------------------------------
23941 
23942 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23943   predicate(VM_Version::supports_avx512_vnni());
23944   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23945   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23946   ins_encode %{
23947     assert(UseAVX > 2, "required");
23948     int vlen_enc = vector_length_encoding(this);
23949     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23950   %}
23951   ins_pipe( pipe_slow );
23952   ins_cost(10);
23953 %}
23954 
23955 // --------------------------------- PopCount --------------------------------------
23956 
23957 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23958   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23959   match(Set dst (PopCountVI src));
23960   match(Set dst (PopCountVL src));
23961   format %{ "vector_popcount_integral $dst, $src" %}
23962   ins_encode %{
23963     int opcode = this->ideal_Opcode();
23964     int vlen_enc = vector_length_encoding(this, $src);
23965     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23966     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23967   %}
23968   ins_pipe( pipe_slow );
23969 %}
23970 
23971 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23972   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23973   match(Set dst (PopCountVI src mask));
23974   match(Set dst (PopCountVL src mask));
23975   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23976   ins_encode %{
23977     int vlen_enc = vector_length_encoding(this, $src);
23978     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23979     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23980     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23981   %}
23982   ins_pipe( pipe_slow );
23983 %}
23984 
23985 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23986   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23987   match(Set dst (PopCountVI src));
23988   match(Set dst (PopCountVL src));
23989   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23990   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23991   ins_encode %{
23992     int opcode = this->ideal_Opcode();
23993     int vlen_enc = vector_length_encoding(this, $src);
23994     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23995     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23996                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23997   %}
23998   ins_pipe( pipe_slow );
23999 %}
24000 
24001 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
24002 
24003 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
24004   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24005                                               Matcher::vector_length_in_bytes(n->in(1))));
24006   match(Set dst (CountTrailingZerosV src));
24007   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
24008   ins_cost(400);
24009   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
24010   ins_encode %{
24011     int vlen_enc = vector_length_encoding(this, $src);
24012     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24013     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24014                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24015   %}
24016   ins_pipe( pipe_slow );
24017 %}
24018 
24019 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24020   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24021             VM_Version::supports_avx512cd() &&
24022             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24023   match(Set dst (CountTrailingZerosV src));
24024   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24025   ins_cost(400);
24026   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
24027   ins_encode %{
24028     int vlen_enc = vector_length_encoding(this, $src);
24029     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24030     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24031                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24032   %}
24033   ins_pipe( pipe_slow );
24034 %}
24035 
24036 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
24037   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24038   match(Set dst (CountTrailingZerosV src));
24039   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
24040   ins_cost(400);
24041   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
24042   ins_encode %{
24043     int vlen_enc = vector_length_encoding(this, $src);
24044     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24045     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24046                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
24047                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
24048   %}
24049   ins_pipe( pipe_slow );
24050 %}
24051 
24052 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24053   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24054   match(Set dst (CountTrailingZerosV src));
24055   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24056   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24057   ins_encode %{
24058     int vlen_enc = vector_length_encoding(this, $src);
24059     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24060     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24061                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24062   %}
24063   ins_pipe( pipe_slow );
24064 %}
24065 
24066 
24067 // --------------------------------- Bitwise Ternary Logic ----------------------------------
24068 
24069 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
24070   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
24071   effect(TEMP dst);
24072   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24073   ins_encode %{
24074     int vector_len = vector_length_encoding(this);
24075     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
24076   %}
24077   ins_pipe( pipe_slow );
24078 %}
24079 
24080 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
24081   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
24082   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
24083   effect(TEMP dst);
24084   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24085   ins_encode %{
24086     int vector_len = vector_length_encoding(this);
24087     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
24088   %}
24089   ins_pipe( pipe_slow );
24090 %}
24091 
24092 // --------------------------------- Rotation Operations ----------------------------------
24093 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
24094   match(Set dst (RotateLeftV src shift));
24095   match(Set dst (RotateRightV src shift));
24096   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
24097   ins_encode %{
24098     int opcode      = this->ideal_Opcode();
24099     int vector_len  = vector_length_encoding(this);
24100     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24101     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
24102   %}
24103   ins_pipe( pipe_slow );
24104 %}
24105 
24106 instruct vprorate(vec dst, vec src, vec shift) %{
24107   match(Set dst (RotateLeftV src shift));
24108   match(Set dst (RotateRightV src shift));
24109   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
24110   ins_encode %{
24111     int opcode      = this->ideal_Opcode();
24112     int vector_len  = vector_length_encoding(this);
24113     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24114     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
24115   %}
24116   ins_pipe( pipe_slow );
24117 %}
24118 
24119 // ---------------------------------- Masked Operations ------------------------------------
24120 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
24121   predicate(!n->in(3)->bottom_type()->isa_vectmask());
24122   match(Set dst (LoadVectorMasked mem mask));
24123   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24124   ins_encode %{
24125     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24126     int vlen_enc = vector_length_encoding(this);
24127     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
24128   %}
24129   ins_pipe( pipe_slow );
24130 %}
24131 
24132 
24133 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
24134   predicate(n->in(3)->bottom_type()->isa_vectmask());
24135   match(Set dst (LoadVectorMasked mem mask));
24136   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24137   ins_encode %{
24138     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
24139     int vector_len = vector_length_encoding(this);
24140     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
24141   %}
24142   ins_pipe( pipe_slow );
24143 %}
24144 
24145 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
24146   predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
24147   match(Set mem (StoreVectorMasked mem (Binary src mask)));
24148   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24149   ins_encode %{
24150     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24151     int vlen_enc = vector_length_encoding(src_node);
24152     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
24153     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
24154   %}
24155   ins_pipe( pipe_slow );
24156 %}
24157 
24158 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
24159   predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
24160   match(Set mem (StoreVectorMasked mem (Binary src mask)));
24161   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24162   ins_encode %{
24163     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24164     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
24165     int vlen_enc = vector_length_encoding(src_node);
24166     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
24167   %}
24168   ins_pipe( pipe_slow );
24169 %}
24170 
24171 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
24172   match(Set addr (VerifyVectorAlignment addr mask));
24173   effect(KILL cr);
24174   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
24175   ins_encode %{
24176     Label Lskip;
24177     // check if masked bits of addr are zero
24178     __ testq($addr$$Register, $mask$$constant);
24179     __ jccb(Assembler::equal, Lskip);
24180     __ stop("verify_vector_alignment found a misaligned vector memory access");
24181     __ bind(Lskip);
24182   %}
24183   ins_pipe(pipe_slow);
24184 %}
24185 
24186 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
24187   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
24188   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
24189   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
24190   ins_encode %{
24191     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
24192     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
24193 
24194     Label DONE;
24195     int vlen_enc = vector_length_encoding(this, $src1);
24196     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
24197 
24198     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
24199     __ mov64($dst$$Register, -1L);
24200     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
24201     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
24202     __ jccb(Assembler::carrySet, DONE);
24203     __ kmovql($dst$$Register, $ktmp1$$KRegister);
24204     __ notq($dst$$Register);
24205     __ tzcntq($dst$$Register, $dst$$Register);
24206     __ bind(DONE);
24207   %}
24208   ins_pipe( pipe_slow );
24209 %}
24210 
24211 
24212 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
24213   match(Set dst (VectorMaskGen len));
24214   effect(TEMP temp, KILL cr);
24215   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
24216   ins_encode %{
24217     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
24218   %}
24219   ins_pipe( pipe_slow );
24220 %}
24221 
24222 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
24223   match(Set dst (VectorMaskGen len));
24224   format %{ "vector_mask_gen $len \t! vector mask generator" %}
24225   effect(TEMP temp);
24226   ins_encode %{
24227     __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
24228     __ kmovql($dst$$KRegister, $temp$$Register);
24229   %}
24230   ins_pipe( pipe_slow );
24231 %}
24232 
24233 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
24234   predicate(n->in(1)->bottom_type()->isa_vectmask());
24235   match(Set dst (VectorMaskToLong mask));
24236   effect(TEMP dst, KILL cr);
24237   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
24238   ins_encode %{
24239     int opcode = this->ideal_Opcode();
24240     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24241     int mask_len = Matcher::vector_length(this, $mask);
24242     int mask_size = mask_len * type2aelembytes(mbt);
24243     int vlen_enc = vector_length_encoding(this, $mask);
24244     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24245                              $dst$$Register, mask_len, mask_size, vlen_enc);
24246   %}
24247   ins_pipe( pipe_slow );
24248 %}
24249 
24250 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24251   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24252   match(Set dst (VectorMaskToLong mask));
24253   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24254   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24255   ins_encode %{
24256     int opcode = this->ideal_Opcode();
24257     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24258     int mask_len = Matcher::vector_length(this, $mask);
24259     int vlen_enc = vector_length_encoding(this, $mask);
24260     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24261                              $dst$$Register, mask_len, mbt, vlen_enc);
24262   %}
24263   ins_pipe( pipe_slow );
24264 %}
24265 
24266 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24267   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24268   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24269   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24270   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24271   ins_encode %{
24272     int opcode = this->ideal_Opcode();
24273     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24274     int mask_len = Matcher::vector_length(this, $mask);
24275     int vlen_enc = vector_length_encoding(this, $mask);
24276     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24277                              $dst$$Register, mask_len, mbt, vlen_enc);
24278   %}
24279   ins_pipe( pipe_slow );
24280 %}
24281 
24282 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24283   predicate(n->in(1)->bottom_type()->isa_vectmask());
24284   match(Set dst (VectorMaskTrueCount mask));
24285   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24286   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24287   ins_encode %{
24288     int opcode = this->ideal_Opcode();
24289     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24290     int mask_len = Matcher::vector_length(this, $mask);
24291     int mask_size = mask_len * type2aelembytes(mbt);
24292     int vlen_enc = vector_length_encoding(this, $mask);
24293     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24294                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24295   %}
24296   ins_pipe( pipe_slow );
24297 %}
24298 
24299 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24300   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24301   match(Set dst (VectorMaskTrueCount mask));
24302   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24303   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24304   ins_encode %{
24305     int opcode = this->ideal_Opcode();
24306     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24307     int mask_len = Matcher::vector_length(this, $mask);
24308     int vlen_enc = vector_length_encoding(this, $mask);
24309     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24310                              $tmp$$Register, mask_len, mbt, vlen_enc);
24311   %}
24312   ins_pipe( pipe_slow );
24313 %}
24314 
24315 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24316   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24317   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24318   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24319   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24320   ins_encode %{
24321     int opcode = this->ideal_Opcode();
24322     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24323     int mask_len = Matcher::vector_length(this, $mask);
24324     int vlen_enc = vector_length_encoding(this, $mask);
24325     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24326                              $tmp$$Register, mask_len, mbt, vlen_enc);
24327   %}
24328   ins_pipe( pipe_slow );
24329 %}
24330 
24331 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24332   predicate(n->in(1)->bottom_type()->isa_vectmask());
24333   match(Set dst (VectorMaskFirstTrue mask));
24334   match(Set dst (VectorMaskLastTrue mask));
24335   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24336   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24337   ins_encode %{
24338     int opcode = this->ideal_Opcode();
24339     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24340     int mask_len = Matcher::vector_length(this, $mask);
24341     int mask_size = mask_len * type2aelembytes(mbt);
24342     int vlen_enc = vector_length_encoding(this, $mask);
24343     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24344                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24345   %}
24346   ins_pipe( pipe_slow );
24347 %}
24348 
24349 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24350   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24351   match(Set dst (VectorMaskFirstTrue mask));
24352   match(Set dst (VectorMaskLastTrue mask));
24353   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24354   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24355   ins_encode %{
24356     int opcode = this->ideal_Opcode();
24357     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24358     int mask_len = Matcher::vector_length(this, $mask);
24359     int vlen_enc = vector_length_encoding(this, $mask);
24360     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24361                              $tmp$$Register, mask_len, mbt, vlen_enc);
24362   %}
24363   ins_pipe( pipe_slow );
24364 %}
24365 
24366 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24367   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24368   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24369   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24370   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24371   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24372   ins_encode %{
24373     int opcode = this->ideal_Opcode();
24374     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24375     int mask_len = Matcher::vector_length(this, $mask);
24376     int vlen_enc = vector_length_encoding(this, $mask);
24377     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24378                              $tmp$$Register, mask_len, mbt, vlen_enc);
24379   %}
24380   ins_pipe( pipe_slow );
24381 %}
24382 
24383 // --------------------------------- Compress/Expand Operations ---------------------------
24384 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24385   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24386   match(Set dst (CompressV src mask));
24387   match(Set dst (ExpandV src mask));
24388   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24389   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24390   ins_encode %{
24391     int opcode = this->ideal_Opcode();
24392     int vlen_enc = vector_length_encoding(this);
24393     BasicType bt  = Matcher::vector_element_basic_type(this);
24394     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24395                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24396   %}
24397   ins_pipe( pipe_slow );
24398 %}
24399 
24400 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24401   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24402   match(Set dst (CompressV src mask));
24403   match(Set dst (ExpandV src mask));
24404   format %{ "vector_compress_expand $dst, $src, $mask" %}
24405   ins_encode %{
24406     int opcode = this->ideal_Opcode();
24407     int vector_len = vector_length_encoding(this);
24408     BasicType bt  = Matcher::vector_element_basic_type(this);
24409     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24410   %}
24411   ins_pipe( pipe_slow );
24412 %}
24413 
24414 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24415   match(Set dst (CompressM mask));
24416   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24417   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24418   ins_encode %{
24419     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24420     int mask_len = Matcher::vector_length(this);
24421     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24422   %}
24423   ins_pipe( pipe_slow );
24424 %}
24425 
24426 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24427 
24428 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24429   predicate(!VM_Version::supports_gfni());
24430   match(Set dst (ReverseV src));
24431   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24432   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24433   ins_encode %{
24434     int vec_enc = vector_length_encoding(this);
24435     BasicType bt = Matcher::vector_element_basic_type(this);
24436     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24437                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24438   %}
24439   ins_pipe( pipe_slow );
24440 %}
24441 
24442 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24443   predicate(VM_Version::supports_gfni());
24444   match(Set dst (ReverseV src));
24445   effect(TEMP dst, TEMP xtmp);
24446   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24447   ins_encode %{
24448     int vec_enc = vector_length_encoding(this);
24449     BasicType bt  = Matcher::vector_element_basic_type(this);
24450     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24451     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24452                                $xtmp$$XMMRegister);
24453   %}
24454   ins_pipe( pipe_slow );
24455 %}
24456 
24457 instruct vreverse_byte_reg(vec dst, vec src) %{
24458   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24459   match(Set dst (ReverseBytesV src));
24460   effect(TEMP dst);
24461   format %{ "vector_reverse_byte $dst, $src" %}
24462   ins_encode %{
24463     int vec_enc = vector_length_encoding(this);
24464     BasicType bt = Matcher::vector_element_basic_type(this);
24465     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24466   %}
24467   ins_pipe( pipe_slow );
24468 %}
24469 
24470 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24471   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24472   match(Set dst (ReverseBytesV src));
24473   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24474   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24475   ins_encode %{
24476     int vec_enc = vector_length_encoding(this);
24477     BasicType bt = Matcher::vector_element_basic_type(this);
24478     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24479                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24480   %}
24481   ins_pipe( pipe_slow );
24482 %}
24483 
24484 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24485 
24486 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24487   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24488                                               Matcher::vector_length_in_bytes(n->in(1))));
24489   match(Set dst (CountLeadingZerosV src));
24490   format %{ "vector_count_leading_zeros $dst, $src" %}
24491   ins_encode %{
24492      int vlen_enc = vector_length_encoding(this, $src);
24493      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24494      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24495                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24496   %}
24497   ins_pipe( pipe_slow );
24498 %}
24499 
24500 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24501   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24502                                               Matcher::vector_length_in_bytes(n->in(1))));
24503   match(Set dst (CountLeadingZerosV src mask));
24504   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24505   ins_encode %{
24506     int vlen_enc = vector_length_encoding(this, $src);
24507     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24508     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24509     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24510                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24511   %}
24512   ins_pipe( pipe_slow );
24513 %}
24514 
24515 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24516   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24517             VM_Version::supports_avx512cd() &&
24518             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24519   match(Set dst (CountLeadingZerosV src));
24520   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24521   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24522   ins_encode %{
24523     int vlen_enc = vector_length_encoding(this, $src);
24524     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24525     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24526                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24527   %}
24528   ins_pipe( pipe_slow );
24529 %}
24530 
24531 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24532   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24533   match(Set dst (CountLeadingZerosV src));
24534   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24535   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24536   ins_encode %{
24537     int vlen_enc = vector_length_encoding(this, $src);
24538     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24539     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24540                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24541                                        $rtmp$$Register, true, vlen_enc);
24542   %}
24543   ins_pipe( pipe_slow );
24544 %}
24545 
24546 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24547   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24548             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24549   match(Set dst (CountLeadingZerosV src));
24550   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24551   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24552   ins_encode %{
24553     int vlen_enc = vector_length_encoding(this, $src);
24554     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24555     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24556                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24557   %}
24558   ins_pipe( pipe_slow );
24559 %}
24560 
24561 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24562   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24563             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24564   match(Set dst (CountLeadingZerosV src));
24565   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24566   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24567   ins_encode %{
24568     int vlen_enc = vector_length_encoding(this, $src);
24569     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24570     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24571                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24572   %}
24573   ins_pipe( pipe_slow );
24574 %}
24575 
24576 // ---------------------------------- Vector Masked Operations ------------------------------------
24577 
24578 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24579   match(Set dst (AddVB (Binary dst src2) mask));
24580   match(Set dst (AddVS (Binary dst src2) mask));
24581   match(Set dst (AddVI (Binary dst src2) mask));
24582   match(Set dst (AddVL (Binary dst src2) mask));
24583   match(Set dst (AddVF (Binary dst src2) mask));
24584   match(Set dst (AddVD (Binary dst src2) mask));
24585   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24586   ins_encode %{
24587     int vlen_enc = vector_length_encoding(this);
24588     BasicType bt = Matcher::vector_element_basic_type(this);
24589     int opc = this->ideal_Opcode();
24590     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24591                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24592   %}
24593   ins_pipe( pipe_slow );
24594 %}
24595 
24596 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24597   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24598   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24599   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24600   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24601   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24602   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24603   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24604   ins_encode %{
24605     int vlen_enc = vector_length_encoding(this);
24606     BasicType bt = Matcher::vector_element_basic_type(this);
24607     int opc = this->ideal_Opcode();
24608     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24609                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24610   %}
24611   ins_pipe( pipe_slow );
24612 %}
24613 
24614 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24615   match(Set dst (XorV (Binary dst src2) mask));
24616   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24617   ins_encode %{
24618     int vlen_enc = vector_length_encoding(this);
24619     BasicType bt = Matcher::vector_element_basic_type(this);
24620     int opc = this->ideal_Opcode();
24621     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24622                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24623   %}
24624   ins_pipe( pipe_slow );
24625 %}
24626 
24627 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24628   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24629   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24630   ins_encode %{
24631     int vlen_enc = vector_length_encoding(this);
24632     BasicType bt = Matcher::vector_element_basic_type(this);
24633     int opc = this->ideal_Opcode();
24634     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24635                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24636   %}
24637   ins_pipe( pipe_slow );
24638 %}
24639 
24640 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24641   match(Set dst (OrV (Binary dst src2) mask));
24642   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24643   ins_encode %{
24644     int vlen_enc = vector_length_encoding(this);
24645     BasicType bt = Matcher::vector_element_basic_type(this);
24646     int opc = this->ideal_Opcode();
24647     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24648                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24649   %}
24650   ins_pipe( pipe_slow );
24651 %}
24652 
24653 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24654   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24655   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24656   ins_encode %{
24657     int vlen_enc = vector_length_encoding(this);
24658     BasicType bt = Matcher::vector_element_basic_type(this);
24659     int opc = this->ideal_Opcode();
24660     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24661                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24662   %}
24663   ins_pipe( pipe_slow );
24664 %}
24665 
24666 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24667   match(Set dst (AndV (Binary dst src2) mask));
24668   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24669   ins_encode %{
24670     int vlen_enc = vector_length_encoding(this);
24671     BasicType bt = Matcher::vector_element_basic_type(this);
24672     int opc = this->ideal_Opcode();
24673     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24674                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24675   %}
24676   ins_pipe( pipe_slow );
24677 %}
24678 
24679 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24680   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24681   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24682   ins_encode %{
24683     int vlen_enc = vector_length_encoding(this);
24684     BasicType bt = Matcher::vector_element_basic_type(this);
24685     int opc = this->ideal_Opcode();
24686     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24687                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24688   %}
24689   ins_pipe( pipe_slow );
24690 %}
24691 
24692 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24693   match(Set dst (SubVB (Binary dst src2) mask));
24694   match(Set dst (SubVS (Binary dst src2) mask));
24695   match(Set dst (SubVI (Binary dst src2) mask));
24696   match(Set dst (SubVL (Binary dst src2) mask));
24697   match(Set dst (SubVF (Binary dst src2) mask));
24698   match(Set dst (SubVD (Binary dst src2) mask));
24699   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24700   ins_encode %{
24701     int vlen_enc = vector_length_encoding(this);
24702     BasicType bt = Matcher::vector_element_basic_type(this);
24703     int opc = this->ideal_Opcode();
24704     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24705                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24706   %}
24707   ins_pipe( pipe_slow );
24708 %}
24709 
24710 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24711   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24712   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24713   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24714   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24715   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24716   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24717   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24718   ins_encode %{
24719     int vlen_enc = vector_length_encoding(this);
24720     BasicType bt = Matcher::vector_element_basic_type(this);
24721     int opc = this->ideal_Opcode();
24722     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24723                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24724   %}
24725   ins_pipe( pipe_slow );
24726 %}
24727 
24728 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24729   match(Set dst (MulVS (Binary dst src2) mask));
24730   match(Set dst (MulVI (Binary dst src2) mask));
24731   match(Set dst (MulVL (Binary dst src2) mask));
24732   match(Set dst (MulVF (Binary dst src2) mask));
24733   match(Set dst (MulVD (Binary dst src2) mask));
24734   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24735   ins_encode %{
24736     int vlen_enc = vector_length_encoding(this);
24737     BasicType bt = Matcher::vector_element_basic_type(this);
24738     int opc = this->ideal_Opcode();
24739     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24740                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24741   %}
24742   ins_pipe( pipe_slow );
24743 %}
24744 
24745 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24746   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24747   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24748   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24749   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24750   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24751   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24752   ins_encode %{
24753     int vlen_enc = vector_length_encoding(this);
24754     BasicType bt = Matcher::vector_element_basic_type(this);
24755     int opc = this->ideal_Opcode();
24756     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24757                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24758   %}
24759   ins_pipe( pipe_slow );
24760 %}
24761 
24762 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24763   match(Set dst (SqrtVF dst mask));
24764   match(Set dst (SqrtVD dst mask));
24765   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24766   ins_encode %{
24767     int vlen_enc = vector_length_encoding(this);
24768     BasicType bt = Matcher::vector_element_basic_type(this);
24769     int opc = this->ideal_Opcode();
24770     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24771                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24772   %}
24773   ins_pipe( pipe_slow );
24774 %}
24775 
24776 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24777   match(Set dst (DivVF (Binary dst src2) mask));
24778   match(Set dst (DivVD (Binary dst src2) mask));
24779   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24780   ins_encode %{
24781     int vlen_enc = vector_length_encoding(this);
24782     BasicType bt = Matcher::vector_element_basic_type(this);
24783     int opc = this->ideal_Opcode();
24784     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24785                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24786   %}
24787   ins_pipe( pipe_slow );
24788 %}
24789 
24790 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24791   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24792   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24793   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24794   ins_encode %{
24795     int vlen_enc = vector_length_encoding(this);
24796     BasicType bt = Matcher::vector_element_basic_type(this);
24797     int opc = this->ideal_Opcode();
24798     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24799                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24800   %}
24801   ins_pipe( pipe_slow );
24802 %}
24803 
24804 
24805 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24806   match(Set dst (RotateLeftV (Binary dst shift) mask));
24807   match(Set dst (RotateRightV (Binary dst shift) mask));
24808   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24809   ins_encode %{
24810     int vlen_enc = vector_length_encoding(this);
24811     BasicType bt = Matcher::vector_element_basic_type(this);
24812     int opc = this->ideal_Opcode();
24813     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24814                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24815   %}
24816   ins_pipe( pipe_slow );
24817 %}
24818 
24819 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24820   match(Set dst (RotateLeftV (Binary dst src2) mask));
24821   match(Set dst (RotateRightV (Binary dst src2) mask));
24822   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24823   ins_encode %{
24824     int vlen_enc = vector_length_encoding(this);
24825     BasicType bt = Matcher::vector_element_basic_type(this);
24826     int opc = this->ideal_Opcode();
24827     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24828                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24829   %}
24830   ins_pipe( pipe_slow );
24831 %}
24832 
24833 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24834   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24835   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24836   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24837   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24838   ins_encode %{
24839     int vlen_enc = vector_length_encoding(this);
24840     BasicType bt = Matcher::vector_element_basic_type(this);
24841     int opc = this->ideal_Opcode();
24842     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24843                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24844   %}
24845   ins_pipe( pipe_slow );
24846 %}
24847 
24848 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24849   predicate(!n->as_ShiftV()->is_var_shift());
24850   match(Set dst (LShiftVS (Binary dst src2) mask));
24851   match(Set dst (LShiftVI (Binary dst src2) mask));
24852   match(Set dst (LShiftVL (Binary dst src2) mask));
24853   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24854   ins_encode %{
24855     int vlen_enc = vector_length_encoding(this);
24856     BasicType bt = Matcher::vector_element_basic_type(this);
24857     int opc = this->ideal_Opcode();
24858     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24859                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24860   %}
24861   ins_pipe( pipe_slow );
24862 %}
24863 
24864 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24865   predicate(n->as_ShiftV()->is_var_shift());
24866   match(Set dst (LShiftVS (Binary dst src2) mask));
24867   match(Set dst (LShiftVI (Binary dst src2) mask));
24868   match(Set dst (LShiftVL (Binary dst src2) mask));
24869   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24870   ins_encode %{
24871     int vlen_enc = vector_length_encoding(this);
24872     BasicType bt = Matcher::vector_element_basic_type(this);
24873     int opc = this->ideal_Opcode();
24874     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24875                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24876   %}
24877   ins_pipe( pipe_slow );
24878 %}
24879 
24880 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24881   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24882   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24883   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24884   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24885   ins_encode %{
24886     int vlen_enc = vector_length_encoding(this);
24887     BasicType bt = Matcher::vector_element_basic_type(this);
24888     int opc = this->ideal_Opcode();
24889     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24890                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24891   %}
24892   ins_pipe( pipe_slow );
24893 %}
24894 
24895 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24896   predicate(!n->as_ShiftV()->is_var_shift());
24897   match(Set dst (RShiftVS (Binary dst src2) mask));
24898   match(Set dst (RShiftVI (Binary dst src2) mask));
24899   match(Set dst (RShiftVL (Binary dst src2) mask));
24900   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24901   ins_encode %{
24902     int vlen_enc = vector_length_encoding(this);
24903     BasicType bt = Matcher::vector_element_basic_type(this);
24904     int opc = this->ideal_Opcode();
24905     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24906                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24907   %}
24908   ins_pipe( pipe_slow );
24909 %}
24910 
24911 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24912   predicate(n->as_ShiftV()->is_var_shift());
24913   match(Set dst (RShiftVS (Binary dst src2) mask));
24914   match(Set dst (RShiftVI (Binary dst src2) mask));
24915   match(Set dst (RShiftVL (Binary dst src2) mask));
24916   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24917   ins_encode %{
24918     int vlen_enc = vector_length_encoding(this);
24919     BasicType bt = Matcher::vector_element_basic_type(this);
24920     int opc = this->ideal_Opcode();
24921     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24922                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24923   %}
24924   ins_pipe( pipe_slow );
24925 %}
24926 
24927 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24928   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24929   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24930   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24931   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24932   ins_encode %{
24933     int vlen_enc = vector_length_encoding(this);
24934     BasicType bt = Matcher::vector_element_basic_type(this);
24935     int opc = this->ideal_Opcode();
24936     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24937                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24938   %}
24939   ins_pipe( pipe_slow );
24940 %}
24941 
24942 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24943   predicate(!n->as_ShiftV()->is_var_shift());
24944   match(Set dst (URShiftVS (Binary dst src2) mask));
24945   match(Set dst (URShiftVI (Binary dst src2) mask));
24946   match(Set dst (URShiftVL (Binary dst src2) mask));
24947   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24948   ins_encode %{
24949     int vlen_enc = vector_length_encoding(this);
24950     BasicType bt = Matcher::vector_element_basic_type(this);
24951     int opc = this->ideal_Opcode();
24952     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24953                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24954   %}
24955   ins_pipe( pipe_slow );
24956 %}
24957 
24958 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24959   predicate(n->as_ShiftV()->is_var_shift());
24960   match(Set dst (URShiftVS (Binary dst src2) mask));
24961   match(Set dst (URShiftVI (Binary dst src2) mask));
24962   match(Set dst (URShiftVL (Binary dst src2) mask));
24963   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24964   ins_encode %{
24965     int vlen_enc = vector_length_encoding(this);
24966     BasicType bt = Matcher::vector_element_basic_type(this);
24967     int opc = this->ideal_Opcode();
24968     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24969                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24970   %}
24971   ins_pipe( pipe_slow );
24972 %}
24973 
24974 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24975   match(Set dst (MaxV (Binary dst src2) mask));
24976   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24977   ins_encode %{
24978     int vlen_enc = vector_length_encoding(this);
24979     BasicType bt = Matcher::vector_element_basic_type(this);
24980     int opc = this->ideal_Opcode();
24981     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24982                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24983   %}
24984   ins_pipe( pipe_slow );
24985 %}
24986 
24987 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24988   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24989   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24990   ins_encode %{
24991     int vlen_enc = vector_length_encoding(this);
24992     BasicType bt = Matcher::vector_element_basic_type(this);
24993     int opc = this->ideal_Opcode();
24994     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24995                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24996   %}
24997   ins_pipe( pipe_slow );
24998 %}
24999 
25000 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
25001   match(Set dst (MinV (Binary dst src2) mask));
25002   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25003   ins_encode %{
25004     int vlen_enc = vector_length_encoding(this);
25005     BasicType bt = Matcher::vector_element_basic_type(this);
25006     int opc = this->ideal_Opcode();
25007     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25008                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25009   %}
25010   ins_pipe( pipe_slow );
25011 %}
25012 
25013 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
25014   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
25015   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25016   ins_encode %{
25017     int vlen_enc = vector_length_encoding(this);
25018     BasicType bt = Matcher::vector_element_basic_type(this);
25019     int opc = this->ideal_Opcode();
25020     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25021                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
25022   %}
25023   ins_pipe( pipe_slow );
25024 %}
25025 
25026 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
25027   match(Set dst (VectorRearrange (Binary dst src2) mask));
25028   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
25029   ins_encode %{
25030     int vlen_enc = vector_length_encoding(this);
25031     BasicType bt = Matcher::vector_element_basic_type(this);
25032     int opc = this->ideal_Opcode();
25033     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25034                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25035   %}
25036   ins_pipe( pipe_slow );
25037 %}
25038 
25039 instruct vabs_masked(vec dst, kReg mask) %{
25040   match(Set dst (AbsVB dst mask));
25041   match(Set dst (AbsVS dst mask));
25042   match(Set dst (AbsVI dst mask));
25043   match(Set dst (AbsVL dst mask));
25044   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
25045   ins_encode %{
25046     int vlen_enc = vector_length_encoding(this);
25047     BasicType bt = Matcher::vector_element_basic_type(this);
25048     int opc = this->ideal_Opcode();
25049     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25050                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
25051   %}
25052   ins_pipe( pipe_slow );
25053 %}
25054 
25055 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
25056   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
25057   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
25058   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25059   ins_encode %{
25060     assert(UseFMA, "Needs FMA instructions support.");
25061     int vlen_enc = vector_length_encoding(this);
25062     BasicType bt = Matcher::vector_element_basic_type(this);
25063     int opc = this->ideal_Opcode();
25064     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25065                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
25066   %}
25067   ins_pipe( pipe_slow );
25068 %}
25069 
25070 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
25071   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
25072   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
25073   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25074   ins_encode %{
25075     assert(UseFMA, "Needs FMA instructions support.");
25076     int vlen_enc = vector_length_encoding(this);
25077     BasicType bt = Matcher::vector_element_basic_type(this);
25078     int opc = this->ideal_Opcode();
25079     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25080                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
25081   %}
25082   ins_pipe( pipe_slow );
25083 %}
25084 
25085 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
25086   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
25087   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
25088   ins_encode %{
25089     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
25090     int vlen_enc = vector_length_encoding(this, $src1);
25091     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
25092 
25093     // Comparison i
25094     switch (src1_elem_bt) {
25095       case T_BYTE: {
25096         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25097         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25098         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25099         break;
25100       }
25101       case T_SHORT: {
25102         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25103         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25104         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25105         break;
25106       }
25107       case T_INT: {
25108         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25109         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25110         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25111         break;
25112       }
25113       case T_LONG: {
25114         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25115         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25116         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25117         break;
25118       }
25119       case T_FLOAT: {
25120         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25121         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25122         break;
25123       }
25124       case T_DOUBLE: {
25125         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25126         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25127         break;
25128       }
25129       default: assert(false, "%s", type2name(src1_elem_bt)); break;
25130     }
25131   %}
25132   ins_pipe( pipe_slow );
25133 %}
25134 
25135 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
25136   predicate(Matcher::vector_length(n) <= 32);
25137   match(Set dst (MaskAll src));
25138   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
25139   ins_encode %{
25140     int mask_len = Matcher::vector_length(this);
25141     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
25142   %}
25143   ins_pipe( pipe_slow );
25144 %}
25145 
25146 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
25147   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
25148   match(Set dst (XorVMask src (MaskAll cnt)));
25149   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
25150   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
25151   ins_encode %{
25152     uint masklen = Matcher::vector_length(this);
25153     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
25154   %}
25155   ins_pipe( pipe_slow );
25156 %}
25157 
25158 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
25159   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
25160             (Matcher::vector_length(n) == 16) ||
25161             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
25162   match(Set dst (XorVMask src (MaskAll cnt)));
25163   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
25164   ins_encode %{
25165     uint masklen = Matcher::vector_length(this);
25166     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
25167   %}
25168   ins_pipe( pipe_slow );
25169 %}
25170 
25171 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
25172   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
25173   match(Set dst (VectorLongToMask src));
25174   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
25175   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
25176   ins_encode %{
25177     int mask_len = Matcher::vector_length(this);
25178     int vec_enc  = vector_length_encoding(mask_len);
25179     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25180                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
25181   %}
25182   ins_pipe( pipe_slow );
25183 %}
25184 
25185 
25186 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
25187   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
25188   match(Set dst (VectorLongToMask src));
25189   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
25190   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
25191   ins_encode %{
25192     int mask_len = Matcher::vector_length(this);
25193     assert(mask_len <= 32, "invalid mask length");
25194     int vec_enc  = vector_length_encoding(mask_len);
25195     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25196                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
25197   %}
25198   ins_pipe( pipe_slow );
25199 %}
25200 
25201 instruct long_to_mask_evex(kReg dst, rRegL src) %{
25202   predicate(n->bottom_type()->isa_vectmask());
25203   match(Set dst (VectorLongToMask src));
25204   format %{ "long_to_mask_evex $dst, $src\t!" %}
25205   ins_encode %{
25206     __ kmov($dst$$KRegister, $src$$Register);
25207   %}
25208   ins_pipe( pipe_slow );
25209 %}
25210 
25211 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
25212   match(Set dst (AndVMask src1 src2));
25213   match(Set dst (OrVMask src1 src2));
25214   match(Set dst (XorVMask src1 src2));
25215   effect(TEMP kscratch);
25216   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
25217   ins_encode %{
25218     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
25219     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
25220     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
25221     uint masklen = Matcher::vector_length(this);
25222     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
25223     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
25224   %}
25225   ins_pipe( pipe_slow );
25226 %}
25227 
25228 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
25229   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25230   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25231   ins_encode %{
25232     int vlen_enc = vector_length_encoding(this);
25233     BasicType bt = Matcher::vector_element_basic_type(this);
25234     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25235                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
25236   %}
25237   ins_pipe( pipe_slow );
25238 %}
25239 
25240 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
25241   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25242   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25243   ins_encode %{
25244     int vlen_enc = vector_length_encoding(this);
25245     BasicType bt = Matcher::vector_element_basic_type(this);
25246     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25247                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25248   %}
25249   ins_pipe( pipe_slow );
25250 %}
25251 
25252 instruct castMM(kReg dst)
25253 %{
25254   match(Set dst (CastVV dst));
25255 
25256   size(0);
25257   format %{ "# castVV of $dst" %}
25258   ins_encode(/* empty encoding */);
25259   ins_cost(0);
25260   ins_pipe(empty);
25261 %}
25262 
25263 instruct castVV(vec dst)
25264 %{
25265   match(Set dst (CastVV dst));
25266 
25267   size(0);
25268   format %{ "# castVV of $dst" %}
25269   ins_encode(/* empty encoding */);
25270   ins_cost(0);
25271   ins_pipe(empty);
25272 %}
25273 
25274 instruct castVVLeg(legVec dst)
25275 %{
25276   match(Set dst (CastVV dst));
25277 
25278   size(0);
25279   format %{ "# castVV of $dst" %}
25280   ins_encode(/* empty encoding */);
25281   ins_cost(0);
25282   ins_pipe(empty);
25283 %}
25284 
25285 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25286 %{
25287   match(Set dst (IsInfiniteF src));
25288   effect(TEMP ktmp, KILL cr);
25289   format %{ "float_class_check $dst, $src" %}
25290   ins_encode %{
25291     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25292     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25293   %}
25294   ins_pipe(pipe_slow);
25295 %}
25296 
25297 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25298 %{
25299   match(Set dst (IsInfiniteD src));
25300   effect(TEMP ktmp, KILL cr);
25301   format %{ "double_class_check $dst, $src" %}
25302   ins_encode %{
25303     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25304     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25305   %}
25306   ins_pipe(pipe_slow);
25307 %}
25308 
25309 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25310 %{
25311   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25312             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25313   match(Set dst (SaturatingAddV src1 src2));
25314   match(Set dst (SaturatingSubV src1 src2));
25315   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25316   ins_encode %{
25317     int vlen_enc = vector_length_encoding(this);
25318     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25319     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25320                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25321   %}
25322   ins_pipe(pipe_slow);
25323 %}
25324 
25325 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25326 %{
25327   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25328             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25329   match(Set dst (SaturatingAddV src1 src2));
25330   match(Set dst (SaturatingSubV src1 src2));
25331   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25332   ins_encode %{
25333     int vlen_enc = vector_length_encoding(this);
25334     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25335     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25336                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25337   %}
25338   ins_pipe(pipe_slow);
25339 %}
25340 
25341 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25342 %{
25343   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25344             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25345             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25346   match(Set dst (SaturatingAddV src1 src2));
25347   match(Set dst (SaturatingSubV src1 src2));
25348   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25349   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25350   ins_encode %{
25351     int vlen_enc = vector_length_encoding(this);
25352     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25353     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25354                                         $src1$$XMMRegister, $src2$$XMMRegister,
25355                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25356                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25357   %}
25358   ins_pipe(pipe_slow);
25359 %}
25360 
25361 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25362 %{
25363   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25364             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25365             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25366   match(Set dst (SaturatingAddV src1 src2));
25367   match(Set dst (SaturatingSubV src1 src2));
25368   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25369   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25370   ins_encode %{
25371     int vlen_enc = vector_length_encoding(this);
25372     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25373     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25374                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25375                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25376   %}
25377   ins_pipe(pipe_slow);
25378 %}
25379 
25380 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25381 %{
25382   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25383             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25384             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25385   match(Set dst (SaturatingAddV src1 src2));
25386   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25387   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25388   ins_encode %{
25389     int vlen_enc = vector_length_encoding(this);
25390     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25391     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25392                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25393   %}
25394   ins_pipe(pipe_slow);
25395 %}
25396 
25397 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25398 %{
25399   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25400             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25401             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25402   match(Set dst (SaturatingAddV src1 src2));
25403   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25404   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25405   ins_encode %{
25406     int vlen_enc = vector_length_encoding(this);
25407     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25408     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25409                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25410   %}
25411   ins_pipe(pipe_slow);
25412 %}
25413 
25414 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25415 %{
25416   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25417             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25418             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25419   match(Set dst (SaturatingSubV src1 src2));
25420   effect(TEMP ktmp);
25421   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25422   ins_encode %{
25423     int vlen_enc = vector_length_encoding(this);
25424     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25425     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25426                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25427   %}
25428   ins_pipe(pipe_slow);
25429 %}
25430 
25431 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25432 %{
25433   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25434             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25435             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25436   match(Set dst (SaturatingSubV src1 src2));
25437   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25438   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25439   ins_encode %{
25440     int vlen_enc = vector_length_encoding(this);
25441     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25442     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25443                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25444   %}
25445   ins_pipe(pipe_slow);
25446 %}
25447 
25448 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25449 %{
25450   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25451             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25452   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25453   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25454   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25455   ins_encode %{
25456     int vlen_enc = vector_length_encoding(this);
25457     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25458     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25459                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25460   %}
25461   ins_pipe(pipe_slow);
25462 %}
25463 
25464 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25465 %{
25466   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25467             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25468   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25469   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25470   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25471   ins_encode %{
25472     int vlen_enc = vector_length_encoding(this);
25473     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25474     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25475                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25476   %}
25477   ins_pipe(pipe_slow);
25478 %}
25479 
25480 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25481   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25482             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25483   match(Set dst (SaturatingAddV (Binary dst src) mask));
25484   match(Set dst (SaturatingSubV (Binary dst src) mask));
25485   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25486   ins_encode %{
25487     int vlen_enc = vector_length_encoding(this);
25488     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25489     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25490                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25491   %}
25492   ins_pipe( pipe_slow );
25493 %}
25494 
25495 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25496   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25497             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25498   match(Set dst (SaturatingAddV (Binary dst src) mask));
25499   match(Set dst (SaturatingSubV (Binary dst src) mask));
25500   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25501   ins_encode %{
25502     int vlen_enc = vector_length_encoding(this);
25503     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25504     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25505                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25506   %}
25507   ins_pipe( pipe_slow );
25508 %}
25509 
25510 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25511   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25512             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25513   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25514   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25515   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25516   ins_encode %{
25517     int vlen_enc = vector_length_encoding(this);
25518     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25519     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25520                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25521   %}
25522   ins_pipe( pipe_slow );
25523 %}
25524 
25525 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25526   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25527             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25528   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25529   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25530   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25531   ins_encode %{
25532     int vlen_enc = vector_length_encoding(this);
25533     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25534     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25535                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25536   %}
25537   ins_pipe( pipe_slow );
25538 %}
25539 
25540 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25541 %{
25542   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25543   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25544   ins_encode %{
25545     int vlen_enc = vector_length_encoding(this);
25546     BasicType bt = Matcher::vector_element_basic_type(this);
25547     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25548   %}
25549   ins_pipe(pipe_slow);
25550 %}
25551 
25552 instruct reinterpretS2HF(regF dst, rRegI src)
25553 %{
25554   match(Set dst (ReinterpretS2HF src));
25555   format %{ "vmovw $dst, $src" %}
25556   ins_encode %{
25557     __ vmovw($dst$$XMMRegister, $src$$Register);
25558   %}
25559   ins_pipe(pipe_slow);
25560 %}
25561 
25562 instruct reinterpretHF2S(rRegI dst, regF src)
25563 %{
25564   match(Set dst (ReinterpretHF2S src));
25565   format %{ "vmovw $dst, $src" %}
25566   ins_encode %{
25567     __ vmovw($dst$$Register, $src$$XMMRegister);
25568   %}
25569   ins_pipe(pipe_slow);
25570 %}
25571 
25572 instruct convF2HFAndS2HF(regF dst, regF src)
25573 %{
25574   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25575   format %{ "convF2HFAndS2HF $dst, $src" %}
25576   ins_encode %{
25577     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25578   %}
25579   ins_pipe(pipe_slow);
25580 %}
25581 
25582 instruct convHF2SAndHF2F(regF dst, regF src)
25583 %{
25584   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25585   format %{ "convHF2SAndHF2F $dst, $src" %}
25586   ins_encode %{
25587     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25588   %}
25589   ins_pipe(pipe_slow);
25590 %}
25591 
25592 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25593 %{
25594   match(Set dst (SqrtHF src));
25595   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25596   ins_encode %{
25597     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25598   %}
25599   ins_pipe(pipe_slow);
25600 %}
25601 
25602 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25603 %{
25604   match(Set dst (AddHF src1 src2));
25605   match(Set dst (DivHF src1 src2));
25606   match(Set dst (MulHF src1 src2));
25607   match(Set dst (SubHF src1 src2));
25608   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25609   ins_encode %{
25610     int opcode = this->ideal_Opcode();
25611     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25612   %}
25613   ins_pipe(pipe_slow);
25614 %}
25615 
25616 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25617 %{
25618   predicate(VM_Version::supports_avx10_2());
25619   match(Set dst (MaxHF src1 src2));
25620   match(Set dst (MinHF src1 src2));
25621   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25622   ins_encode %{
25623     int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25624     __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25625   %}
25626   ins_pipe( pipe_slow );
25627 %}
25628 
25629 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25630 %{
25631   predicate(!VM_Version::supports_avx10_2());
25632   match(Set dst (MaxHF src1 src2));
25633   match(Set dst (MinHF src1 src2));
25634   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25635   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25636   ins_encode %{
25637     int opcode = this->ideal_Opcode();
25638     __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25639                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25640   %}
25641   ins_pipe( pipe_slow );
25642 %}
25643 
25644 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25645 %{
25646   match(Set dst (FmaHF  src2 (Binary dst src1)));
25647   effect(DEF dst);
25648   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25649   ins_encode %{
25650     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25651   %}
25652   ins_pipe( pipe_slow );
25653 %}
25654 
25655 
25656 instruct vector_sqrt_HF_reg(vec dst, vec src)
25657 %{
25658   match(Set dst (SqrtVHF src));
25659   format %{ "vector_sqrt_fp16 $dst, $src" %}
25660   ins_encode %{
25661     int vlen_enc = vector_length_encoding(this);
25662     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25663   %}
25664   ins_pipe(pipe_slow);
25665 %}
25666 
25667 instruct vector_sqrt_HF_mem(vec dst, memory src)
25668 %{
25669   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25670   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25671   ins_encode %{
25672     int vlen_enc = vector_length_encoding(this);
25673     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25674   %}
25675   ins_pipe(pipe_slow);
25676 %}
25677 
25678 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25679 %{
25680   match(Set dst (AddVHF src1 src2));
25681   match(Set dst (DivVHF src1 src2));
25682   match(Set dst (MulVHF src1 src2));
25683   match(Set dst (SubVHF src1 src2));
25684   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25685   ins_encode %{
25686     int vlen_enc = vector_length_encoding(this);
25687     int opcode = this->ideal_Opcode();
25688     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25689   %}
25690   ins_pipe(pipe_slow);
25691 %}
25692 
25693 
25694 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25695 %{
25696   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25697   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25698   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25699   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25700   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25701   ins_encode %{
25702     int vlen_enc = vector_length_encoding(this);
25703     int opcode = this->ideal_Opcode();
25704     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25705   %}
25706   ins_pipe(pipe_slow);
25707 %}
25708 
25709 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25710 %{
25711   match(Set dst (FmaVHF src2 (Binary dst src1)));
25712   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25713   ins_encode %{
25714     int vlen_enc = vector_length_encoding(this);
25715     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25716   %}
25717   ins_pipe( pipe_slow );
25718 %}
25719 
25720 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25721 %{
25722   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25723   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25724   ins_encode %{
25725     int vlen_enc = vector_length_encoding(this);
25726     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25727   %}
25728   ins_pipe( pipe_slow );
25729 %}
25730 
25731 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25732 %{
25733   predicate(VM_Version::supports_avx10_2());
25734   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25735   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25736   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25737   ins_encode %{
25738     int vlen_enc = vector_length_encoding(this);
25739     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25740     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25741   %}
25742   ins_pipe( pipe_slow );
25743 %}
25744 
25745 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25746 %{
25747   predicate(VM_Version::supports_avx10_2());
25748   match(Set dst (MinVHF src1 src2));
25749   match(Set dst (MaxVHF src1 src2));
25750   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25751   ins_encode %{
25752     int vlen_enc = vector_length_encoding(this);
25753     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25754     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25755   %}
25756   ins_pipe( pipe_slow );
25757 %}
25758 
25759 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25760 %{
25761   predicate(!VM_Version::supports_avx10_2());
25762   match(Set dst (MinVHF src1 src2));
25763   match(Set dst (MaxVHF src1 src2));
25764   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25765   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25766   ins_encode %{
25767     int vlen_enc = vector_length_encoding(this);
25768     int opcode = this->ideal_Opcode();
25769     __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25770                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25771   %}
25772   ins_pipe( pipe_slow );
25773 %}
25774 
25775 //----------PEEPHOLE RULES-----------------------------------------------------
25776 // These must follow all instruction definitions as they use the names
25777 // defined in the instructions definitions.
25778 //
25779 // peeppredicate ( rule_predicate );
25780 // // the predicate unless which the peephole rule will be ignored
25781 //
25782 // peepmatch ( root_instr_name [preceding_instruction]* );
25783 //
25784 // peepprocedure ( procedure_name );
25785 // // provide a procedure name to perform the optimization, the procedure should
25786 // // reside in the architecture dependent peephole file, the method has the
25787 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25788 // // with the arguments being the basic block, the current node index inside the
25789 // // block, the register allocator, the functions upon invoked return a new node
25790 // // defined in peepreplace, and the rules of the nodes appearing in the
25791 // // corresponding peepmatch, the function return true if successful, else
25792 // // return false
25793 //
25794 // peepconstraint %{
25795 // (instruction_number.operand_name relational_op instruction_number.operand_name
25796 //  [, ...] );
25797 // // instruction numbers are zero-based using left to right order in peepmatch
25798 //
25799 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25800 // // provide an instruction_number.operand_name for each operand that appears
25801 // // in the replacement instruction's match rule
25802 //
25803 // ---------VM FLAGS---------------------------------------------------------
25804 //
25805 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25806 //
25807 // Each peephole rule is given an identifying number starting with zero and
25808 // increasing by one in the order seen by the parser.  An individual peephole
25809 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25810 // on the command-line.
25811 //
25812 // ---------CURRENT LIMITATIONS----------------------------------------------
25813 //
25814 // Only transformations inside a basic block (do we need more for peephole)
25815 //
25816 // ---------EXAMPLE----------------------------------------------------------
25817 //
25818 // // pertinent parts of existing instructions in architecture description
25819 // instruct movI(rRegI dst, rRegI src)
25820 // %{
25821 //   match(Set dst (CopyI src));
25822 // %}
25823 //
25824 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25825 // %{
25826 //   match(Set dst (AddI dst src));
25827 //   effect(KILL cr);
25828 // %}
25829 //
25830 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25831 // %{
25832 //   match(Set dst (AddI dst src));
25833 // %}
25834 //
25835 // 1. Simple replacement
25836 // - Only match adjacent instructions in same basic block
25837 // - Only equality constraints
25838 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25839 // - Only one replacement instruction
25840 //
25841 // // Change (inc mov) to lea
25842 // peephole %{
25843 //   // lea should only be emitted when beneficial
25844 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25845 //   // increment preceded by register-register move
25846 //   peepmatch ( incI_rReg movI );
25847 //   // require that the destination register of the increment
25848 //   // match the destination register of the move
25849 //   peepconstraint ( 0.dst == 1.dst );
25850 //   // construct a replacement instruction that sets
25851 //   // the destination to ( move's source register + one )
25852 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25853 // %}
25854 //
25855 // 2. Procedural replacement
25856 // - More flexible finding relevent nodes
25857 // - More flexible constraints
25858 // - More flexible transformations
25859 // - May utilise architecture-dependent API more effectively
25860 // - Currently only one replacement instruction due to adlc parsing capabilities
25861 //
25862 // // Change (inc mov) to lea
25863 // peephole %{
25864 //   // lea should only be emitted when beneficial
25865 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25866 //   // the rule numbers of these nodes inside are passed into the function below
25867 //   peepmatch ( incI_rReg movI );
25868 //   // the method that takes the responsibility of transformation
25869 //   peepprocedure ( inc_mov_to_lea );
25870 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25871 //   // node is passed into the function above
25872 //   peepreplace ( leaI_rReg_immI() );
25873 // %}
25874 
25875 // These instructions is not matched by the matcher but used by the peephole
25876 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25877 %{
25878   predicate(false);
25879   match(Set dst (AddI src1 src2));
25880   format %{ "leal    $dst, [$src1 + $src2]" %}
25881   ins_encode %{
25882     Register dst = $dst$$Register;
25883     Register src1 = $src1$$Register;
25884     Register src2 = $src2$$Register;
25885     if (src1 != rbp && src1 != r13) {
25886       __ leal(dst, Address(src1, src2, Address::times_1));
25887     } else {
25888       assert(src2 != rbp && src2 != r13, "");
25889       __ leal(dst, Address(src2, src1, Address::times_1));
25890     }
25891   %}
25892   ins_pipe(ialu_reg_reg);
25893 %}
25894 
25895 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25896 %{
25897   predicate(false);
25898   match(Set dst (AddI src1 src2));
25899   format %{ "leal    $dst, [$src1 + $src2]" %}
25900   ins_encode %{
25901     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25902   %}
25903   ins_pipe(ialu_reg_reg);
25904 %}
25905 
25906 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25907 %{
25908   predicate(false);
25909   match(Set dst (LShiftI src shift));
25910   format %{ "leal    $dst, [$src << $shift]" %}
25911   ins_encode %{
25912     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25913     Register src = $src$$Register;
25914     if (scale == Address::times_2 && src != rbp && src != r13) {
25915       __ leal($dst$$Register, Address(src, src, Address::times_1));
25916     } else {
25917       __ leal($dst$$Register, Address(noreg, src, scale));
25918     }
25919   %}
25920   ins_pipe(ialu_reg_reg);
25921 %}
25922 
25923 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25924 %{
25925   predicate(false);
25926   match(Set dst (AddL src1 src2));
25927   format %{ "leaq    $dst, [$src1 + $src2]" %}
25928   ins_encode %{
25929     Register dst = $dst$$Register;
25930     Register src1 = $src1$$Register;
25931     Register src2 = $src2$$Register;
25932     if (src1 != rbp && src1 != r13) {
25933       __ leaq(dst, Address(src1, src2, Address::times_1));
25934     } else {
25935       assert(src2 != rbp && src2 != r13, "");
25936       __ leaq(dst, Address(src2, src1, Address::times_1));
25937     }
25938   %}
25939   ins_pipe(ialu_reg_reg);
25940 %}
25941 
25942 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25943 %{
25944   predicate(false);
25945   match(Set dst (AddL src1 src2));
25946   format %{ "leaq    $dst, [$src1 + $src2]" %}
25947   ins_encode %{
25948     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25949   %}
25950   ins_pipe(ialu_reg_reg);
25951 %}
25952 
25953 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25954 %{
25955   predicate(false);
25956   match(Set dst (LShiftL src shift));
25957   format %{ "leaq    $dst, [$src << $shift]" %}
25958   ins_encode %{
25959     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25960     Register src = $src$$Register;
25961     if (scale == Address::times_2 && src != rbp && src != r13) {
25962       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25963     } else {
25964       __ leaq($dst$$Register, Address(noreg, src, scale));
25965     }
25966   %}
25967   ins_pipe(ialu_reg_reg);
25968 %}
25969 
25970 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25971 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25972 // processors with at least partial ALU support for lea
25973 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25974 // beneficial for processors with full ALU support
25975 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25976 
25977 peephole
25978 %{
25979   peeppredicate(VM_Version::supports_fast_2op_lea());
25980   peepmatch (addI_rReg);
25981   peepprocedure (lea_coalesce_reg);
25982   peepreplace (leaI_rReg_rReg_peep());
25983 %}
25984 
25985 peephole
25986 %{
25987   peeppredicate(VM_Version::supports_fast_2op_lea());
25988   peepmatch (addI_rReg_imm);
25989   peepprocedure (lea_coalesce_imm);
25990   peepreplace (leaI_rReg_immI_peep());
25991 %}
25992 
25993 peephole
25994 %{
25995   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25996                 VM_Version::is_intel_cascade_lake());
25997   peepmatch (incI_rReg);
25998   peepprocedure (lea_coalesce_imm);
25999   peepreplace (leaI_rReg_immI_peep());
26000 %}
26001 
26002 peephole
26003 %{
26004   peeppredicate(VM_Version::supports_fast_3op_lea() ||
26005                 VM_Version::is_intel_cascade_lake());
26006   peepmatch (decI_rReg);
26007   peepprocedure (lea_coalesce_imm);
26008   peepreplace (leaI_rReg_immI_peep());
26009 %}
26010 
26011 peephole
26012 %{
26013   peeppredicate(VM_Version::supports_fast_2op_lea());
26014   peepmatch (salI_rReg_immI2);
26015   peepprocedure (lea_coalesce_imm);
26016   peepreplace (leaI_rReg_immI2_peep());
26017 %}
26018 
26019 peephole
26020 %{
26021   peeppredicate(VM_Version::supports_fast_2op_lea());
26022   peepmatch (addL_rReg);
26023   peepprocedure (lea_coalesce_reg);
26024   peepreplace (leaL_rReg_rReg_peep());
26025 %}
26026 
26027 peephole
26028 %{
26029   peeppredicate(VM_Version::supports_fast_2op_lea());
26030   peepmatch (addL_rReg_imm);
26031   peepprocedure (lea_coalesce_imm);
26032   peepreplace (leaL_rReg_immL32_peep());
26033 %}
26034 
26035 peephole
26036 %{
26037   peeppredicate(VM_Version::supports_fast_3op_lea() ||
26038                 VM_Version::is_intel_cascade_lake());
26039   peepmatch (incL_rReg);
26040   peepprocedure (lea_coalesce_imm);
26041   peepreplace (leaL_rReg_immL32_peep());
26042 %}
26043 
26044 peephole
26045 %{
26046   peeppredicate(VM_Version::supports_fast_3op_lea() ||
26047                 VM_Version::is_intel_cascade_lake());
26048   peepmatch (decL_rReg);
26049   peepprocedure (lea_coalesce_imm);
26050   peepreplace (leaL_rReg_immL32_peep());
26051 %}
26052 
26053 peephole
26054 %{
26055   peeppredicate(VM_Version::supports_fast_2op_lea());
26056   peepmatch (salL_rReg_immI2);
26057   peepprocedure (lea_coalesce_imm);
26058   peepreplace (leaL_rReg_immI2_peep());
26059 %}
26060 
26061 peephole
26062 %{
26063   peepmatch (leaPCompressedOopOffset);
26064   peepprocedure (lea_remove_redundant);
26065 %}
26066 
26067 peephole
26068 %{
26069   peepmatch (leaP8Narrow);
26070   peepprocedure (lea_remove_redundant);
26071 %}
26072 
26073 peephole
26074 %{
26075   peepmatch (leaP32Narrow);
26076   peepprocedure (lea_remove_redundant);
26077 %}
26078 
26079 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
26080 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
26081 
26082 //int variant
26083 peephole
26084 %{
26085   peepmatch (testI_reg);
26086   peepprocedure (test_may_remove);
26087 %}
26088 
26089 //long variant
26090 peephole
26091 %{
26092   peepmatch (testL_reg);
26093   peepprocedure (test_may_remove);
26094 %}
26095 
26096 
26097 //----------SMARTSPILL RULES---------------------------------------------------
26098 // These must follow all instruction definitions as they use the names
26099 // defined in the instructions definitions.