1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 
 1663 //
 1664 // Compute padding required for nodes which need alignment
 1665 //
 1666 
 1667 // The address of the call instruction needs to be 4-byte aligned to
 1668 // ensure that it does not span a cache line so that it can be patched.
 1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1670 {
 1671   current_offset += clear_avx_size(); // skip vzeroupper
 1672   current_offset += 1; // skip call opcode byte
 1673   return align_up(current_offset, alignment_required()) - current_offset;
 1674 }
 1675 
 1676 // The address of the call instruction needs to be 4-byte aligned to
 1677 // ensure that it does not span a cache line so that it can be patched.
 1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1679 {
 1680   current_offset += clear_avx_size(); // skip vzeroupper
 1681   current_offset += 11; // skip movq instruction + call opcode byte
 1682   return align_up(current_offset, alignment_required()) - current_offset;
 1683 }
 1684 
 1685 // This could be in MacroAssembler but it's fairly C2 specific
 1686 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1687   Label exit;
 1688   __ jccb(Assembler::noParity, exit);
 1689   __ pushf();
 1690   //
 1691   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1692   // zero OF,AF,SF for NaN values.
 1693   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1694   // values returns 'less than' result (CF is set).
 1695   // Leave the rest of flags unchanged.
 1696   //
 1697   //    7 6 5 4 3 2 1 0
 1698   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1699   //    0 0 1 0 1 0 1 1   (0x2B)
 1700   //
 1701   __ andq(Address(rsp, 0), 0xffffff2b);
 1702   __ popf();
 1703   __ bind(exit);
 1704 }
 1705 
 1706 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1707   Label done;
 1708   __ movl(dst, -1);
 1709   __ jcc(Assembler::parity, done);
 1710   __ jcc(Assembler::below, done);
 1711   __ setcc(Assembler::notEqual, dst);
 1712   __ bind(done);
 1713 }
 1714 
 1715 // Math.min()    # Math.max()
 1716 // --------------------------
 1717 // ucomis[s/d]   #
 1718 // ja   -> b     # a
 1719 // jp   -> NaN   # NaN
 1720 // jb   -> a     # b
 1721 // je            #
 1722 // |-jz -> a | b # a & b
 1723 // |    -> a     #
 1724 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1725                             XMMRegister a, XMMRegister b,
 1726                             XMMRegister xmmt, Register rt,
 1727                             bool min, bool single) {
 1728 
 1729   Label nan, zero, below, above, done;
 1730 
 1731   if (single)
 1732     __ ucomiss(a, b);
 1733   else
 1734     __ ucomisd(a, b);
 1735 
 1736   if (dst->encoding() != (min ? b : a)->encoding())
 1737     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1738   else
 1739     __ jccb(Assembler::above, done);
 1740 
 1741   __ jccb(Assembler::parity, nan);  // PF=1
 1742   __ jccb(Assembler::below, below); // CF=1
 1743 
 1744   // equal
 1745   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1746   if (single) {
 1747     __ ucomiss(a, xmmt);
 1748     __ jccb(Assembler::equal, zero);
 1749 
 1750     __ movflt(dst, a);
 1751     __ jmp(done);
 1752   }
 1753   else {
 1754     __ ucomisd(a, xmmt);
 1755     __ jccb(Assembler::equal, zero);
 1756 
 1757     __ movdbl(dst, a);
 1758     __ jmp(done);
 1759   }
 1760 
 1761   __ bind(zero);
 1762   if (min)
 1763     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1764   else
 1765     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1766 
 1767   __ jmp(done);
 1768 
 1769   __ bind(above);
 1770   if (single)
 1771     __ movflt(dst, min ? b : a);
 1772   else
 1773     __ movdbl(dst, min ? b : a);
 1774 
 1775   __ jmp(done);
 1776 
 1777   __ bind(nan);
 1778   if (single) {
 1779     __ movl(rt, 0x7fc00000); // Float.NaN
 1780     __ movdl(dst, rt);
 1781   }
 1782   else {
 1783     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1784     __ movdq(dst, rt);
 1785   }
 1786   __ jmp(done);
 1787 
 1788   __ bind(below);
 1789   if (single)
 1790     __ movflt(dst, min ? a : b);
 1791   else
 1792     __ movdbl(dst, min ? a : b);
 1793 
 1794   __ bind(done);
 1795 }
 1796 
 1797 //=============================================================================
 1798 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1799 
 1800 int ConstantTable::calculate_table_base_offset() const {
 1801   return 0;  // absolute addressing, no offset
 1802 }
 1803 
 1804 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1805 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1806   ShouldNotReachHere();
 1807 }
 1808 
 1809 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1810   // Empty encoding
 1811 }
 1812 
 1813 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1814   return 0;
 1815 }
 1816 
 1817 #ifndef PRODUCT
 1818 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1819   st->print("# MachConstantBaseNode (empty encoding)");
 1820 }
 1821 #endif
 1822 
 1823 
 1824 //=============================================================================
 1825 #ifndef PRODUCT
 1826 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1827   Compile* C = ra_->C;
 1828 
 1829   int framesize = C->output()->frame_size_in_bytes();
 1830   int bangsize = C->output()->bang_size_in_bytes();
 1831   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1832   // Remove wordSize for return addr which is already pushed.
 1833   framesize -= wordSize;
 1834 
 1835   if (C->output()->need_stack_bang(bangsize)) {
 1836     framesize -= wordSize;
 1837     st->print("# stack bang (%d bytes)", bangsize);
 1838     st->print("\n\t");
 1839     st->print("pushq   rbp\t# Save rbp");
 1840     if (PreserveFramePointer) {
 1841         st->print("\n\t");
 1842         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1843     }
 1844     if (framesize) {
 1845       st->print("\n\t");
 1846       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1847     }
 1848   } else {
 1849     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1850     st->print("\n\t");
 1851     framesize -= wordSize;
 1852     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1853     if (PreserveFramePointer) {
 1854       st->print("\n\t");
 1855       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1856       if (framesize > 0) {
 1857         st->print("\n\t");
 1858         st->print("addq    rbp, #%d", framesize);
 1859       }
 1860     }
 1861   }
 1862 
 1863   if (VerifyStackAtCalls) {
 1864     st->print("\n\t");
 1865     framesize -= wordSize;
 1866     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1867 #ifdef ASSERT
 1868     st->print("\n\t");
 1869     st->print("# stack alignment check");
 1870 #endif
 1871   }
 1872   if (C->stub_function() != nullptr) {
 1873     st->print("\n\t");
 1874     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1875     st->print("\n\t");
 1876     st->print("je      fast_entry\t");
 1877     st->print("\n\t");
 1878     st->print("call    #nmethod_entry_barrier_stub\t");
 1879     st->print("\n\tfast_entry:");
 1880   }
 1881   st->cr();
 1882 }
 1883 #endif
 1884 
 1885 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1886   Compile* C = ra_->C;
 1887 
 1888   __ verified_entry(C);
 1889 
 1890   if (ra_->C->stub_function() == nullptr) {
 1891     __ entry_barrier();
 1892   }
 1893 
 1894   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1895     __ bind(*_verified_entry);
 1896   }
 1897 
 1898   C->output()->set_frame_complete(__ offset());
 1899 
 1900   if (C->has_mach_constant_base_node()) {
 1901     // NOTE: We set the table base offset here because users might be
 1902     // emitted before MachConstantBaseNode.
 1903     ConstantTable& constant_table = C->output()->constant_table();
 1904     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1905   }
 1906 }
 1907 
 1908 
 1909 int MachPrologNode::reloc() const
 1910 {
 1911   return 0; // a large enough number
 1912 }
 1913 
 1914 //=============================================================================
 1915 #ifndef PRODUCT
 1916 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1917 {
 1918   Compile* C = ra_->C;
 1919   if (generate_vzeroupper(C)) {
 1920     st->print("vzeroupper");
 1921     st->cr(); st->print("\t");
 1922   }
 1923 
 1924   int framesize = C->output()->frame_size_in_bytes();
 1925   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1926   // Remove word for return adr already pushed
 1927   // and RBP
 1928   framesize -= 2*wordSize;
 1929 
 1930   if (framesize) {
 1931     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1932     st->print("\t");
 1933   }
 1934 
 1935   st->print_cr("popq    rbp");
 1936   if (do_polling() && C->is_method_compilation()) {
 1937     st->print("\t");
 1938     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1939                  "ja      #safepoint_stub\t"
 1940                  "# Safepoint: poll for GC");
 1941   }
 1942 }
 1943 #endif
 1944 
 1945 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1946 {
 1947   Compile* C = ra_->C;
 1948 
 1949   if (generate_vzeroupper(C)) {
 1950     // Clear upper bits of YMM registers when current compiled code uses
 1951     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1952     __ vzeroupper();
 1953   }
 1954 
 1955   // Subtract two words to account for return address and rbp
 1956   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1957   __ remove_frame(initial_framesize, C->needs_stack_repair());
 1958 
 1959   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1960     __ reserved_stack_check();
 1961   }
 1962 
 1963   if (do_polling() && C->is_method_compilation()) {
 1964     Label dummy_label;
 1965     Label* code_stub = &dummy_label;
 1966     if (!C->output()->in_scratch_emit_size()) {
 1967       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1968       C->output()->add_stub(stub);
 1969       code_stub = &stub->entry();
 1970     }
 1971     __ relocate(relocInfo::poll_return_type);
 1972     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1973   }
 1974 }
 1975 
 1976 int MachEpilogNode::reloc() const
 1977 {
 1978   return 2; // a large enough number
 1979 }
 1980 
 1981 const Pipeline* MachEpilogNode::pipeline() const
 1982 {
 1983   return MachNode::pipeline_class();
 1984 }
 1985 
 1986 //=============================================================================
 1987 
 1988 enum RC {
 1989   rc_bad,
 1990   rc_int,
 1991   rc_kreg,
 1992   rc_float,
 1993   rc_stack
 1994 };
 1995 
 1996 static enum RC rc_class(OptoReg::Name reg)
 1997 {
 1998   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 1999 
 2000   if (OptoReg::is_stack(reg)) return rc_stack;
 2001 
 2002   VMReg r = OptoReg::as_VMReg(reg);
 2003 
 2004   if (r->is_Register()) return rc_int;
 2005 
 2006   if (r->is_KRegister()) return rc_kreg;
 2007 
 2008   assert(r->is_XMMRegister(), "must be");
 2009   return rc_float;
 2010 }
 2011 
 2012 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2013 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2014                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2015 
 2016 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2017                      int stack_offset, int reg, uint ireg, outputStream* st);
 2018 
 2019 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2020                                       int dst_offset, uint ireg, outputStream* st) {
 2021   if (masm) {
 2022     switch (ireg) {
 2023     case Op_VecS:
 2024       __ movq(Address(rsp, -8), rax);
 2025       __ movl(rax, Address(rsp, src_offset));
 2026       __ movl(Address(rsp, dst_offset), rax);
 2027       __ movq(rax, Address(rsp, -8));
 2028       break;
 2029     case Op_VecD:
 2030       __ pushq(Address(rsp, src_offset));
 2031       __ popq (Address(rsp, dst_offset));
 2032       break;
 2033     case Op_VecX:
 2034       __ pushq(Address(rsp, src_offset));
 2035       __ popq (Address(rsp, dst_offset));
 2036       __ pushq(Address(rsp, src_offset+8));
 2037       __ popq (Address(rsp, dst_offset+8));
 2038       break;
 2039     case Op_VecY:
 2040       __ vmovdqu(Address(rsp, -32), xmm0);
 2041       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2042       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2043       __ vmovdqu(xmm0, Address(rsp, -32));
 2044       break;
 2045     case Op_VecZ:
 2046       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2047       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2048       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2049       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2050       break;
 2051     default:
 2052       ShouldNotReachHere();
 2053     }
 2054 #ifndef PRODUCT
 2055   } else {
 2056     switch (ireg) {
 2057     case Op_VecS:
 2058       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2059                 "movl    rax, [rsp + #%d]\n\t"
 2060                 "movl    [rsp + #%d], rax\n\t"
 2061                 "movq    rax, [rsp - #8]",
 2062                 src_offset, dst_offset);
 2063       break;
 2064     case Op_VecD:
 2065       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2066                 "popq    [rsp + #%d]",
 2067                 src_offset, dst_offset);
 2068       break;
 2069      case Op_VecX:
 2070       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2071                 "popq    [rsp + #%d]\n\t"
 2072                 "pushq   [rsp + #%d]\n\t"
 2073                 "popq    [rsp + #%d]",
 2074                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2075       break;
 2076     case Op_VecY:
 2077       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2078                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2079                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2080                 "vmovdqu xmm0, [rsp - #32]",
 2081                 src_offset, dst_offset);
 2082       break;
 2083     case Op_VecZ:
 2084       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2085                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2086                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2087                 "vmovdqu xmm0, [rsp - #64]",
 2088                 src_offset, dst_offset);
 2089       break;
 2090     default:
 2091       ShouldNotReachHere();
 2092     }
 2093 #endif
 2094   }
 2095 }
 2096 
 2097 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2098                                        PhaseRegAlloc* ra_,
 2099                                        bool do_size,
 2100                                        outputStream* st) const {
 2101   assert(masm != nullptr || st  != nullptr, "sanity");
 2102   // Get registers to move
 2103   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2104   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2105   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2106   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2107 
 2108   enum RC src_second_rc = rc_class(src_second);
 2109   enum RC src_first_rc = rc_class(src_first);
 2110   enum RC dst_second_rc = rc_class(dst_second);
 2111   enum RC dst_first_rc = rc_class(dst_first);
 2112 
 2113   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2114          "must move at least 1 register" );
 2115 
 2116   if (src_first == dst_first && src_second == dst_second) {
 2117     // Self copy, no move
 2118     return 0;
 2119   }
 2120   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 2121     uint ireg = ideal_reg();
 2122     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2123     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2124     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2125       // mem -> mem
 2126       int src_offset = ra_->reg2offset(src_first);
 2127       int dst_offset = ra_->reg2offset(dst_first);
 2128       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2129     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2130       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2131     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2132       int stack_offset = ra_->reg2offset(dst_first);
 2133       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2134     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2135       int stack_offset = ra_->reg2offset(src_first);
 2136       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2137     } else {
 2138       ShouldNotReachHere();
 2139     }
 2140     return 0;
 2141   }
 2142   if (src_first_rc == rc_stack) {
 2143     // mem ->
 2144     if (dst_first_rc == rc_stack) {
 2145       // mem -> mem
 2146       assert(src_second != dst_first, "overlap");
 2147       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2148           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2149         // 64-bit
 2150         int src_offset = ra_->reg2offset(src_first);
 2151         int dst_offset = ra_->reg2offset(dst_first);
 2152         if (masm) {
 2153           __ pushq(Address(rsp, src_offset));
 2154           __ popq (Address(rsp, dst_offset));
 2155 #ifndef PRODUCT
 2156         } else {
 2157           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2158                     "popq    [rsp + #%d]",
 2159                      src_offset, dst_offset);
 2160 #endif
 2161         }
 2162       } else {
 2163         // 32-bit
 2164         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2165         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2166         // No pushl/popl, so:
 2167         int src_offset = ra_->reg2offset(src_first);
 2168         int dst_offset = ra_->reg2offset(dst_first);
 2169         if (masm) {
 2170           __ movq(Address(rsp, -8), rax);
 2171           __ movl(rax, Address(rsp, src_offset));
 2172           __ movl(Address(rsp, dst_offset), rax);
 2173           __ movq(rax, Address(rsp, -8));
 2174 #ifndef PRODUCT
 2175         } else {
 2176           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2177                     "movl    rax, [rsp + #%d]\n\t"
 2178                     "movl    [rsp + #%d], rax\n\t"
 2179                     "movq    rax, [rsp - #8]",
 2180                      src_offset, dst_offset);
 2181 #endif
 2182         }
 2183       }
 2184       return 0;
 2185     } else if (dst_first_rc == rc_int) {
 2186       // mem -> gpr
 2187       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2188           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2189         // 64-bit
 2190         int offset = ra_->reg2offset(src_first);
 2191         if (masm) {
 2192           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2193 #ifndef PRODUCT
 2194         } else {
 2195           st->print("movq    %s, [rsp + #%d]\t# spill",
 2196                      Matcher::regName[dst_first],
 2197                      offset);
 2198 #endif
 2199         }
 2200       } else {
 2201         // 32-bit
 2202         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2203         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2204         int offset = ra_->reg2offset(src_first);
 2205         if (masm) {
 2206           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2207 #ifndef PRODUCT
 2208         } else {
 2209           st->print("movl    %s, [rsp + #%d]\t# spill",
 2210                      Matcher::regName[dst_first],
 2211                      offset);
 2212 #endif
 2213         }
 2214       }
 2215       return 0;
 2216     } else if (dst_first_rc == rc_float) {
 2217       // mem-> xmm
 2218       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2219           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2220         // 64-bit
 2221         int offset = ra_->reg2offset(src_first);
 2222         if (masm) {
 2223           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2224 #ifndef PRODUCT
 2225         } else {
 2226           st->print("%s  %s, [rsp + #%d]\t# spill",
 2227                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2228                      Matcher::regName[dst_first],
 2229                      offset);
 2230 #endif
 2231         }
 2232       } else {
 2233         // 32-bit
 2234         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2235         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2236         int offset = ra_->reg2offset(src_first);
 2237         if (masm) {
 2238           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2239 #ifndef PRODUCT
 2240         } else {
 2241           st->print("movss   %s, [rsp + #%d]\t# spill",
 2242                      Matcher::regName[dst_first],
 2243                      offset);
 2244 #endif
 2245         }
 2246       }
 2247       return 0;
 2248     } else if (dst_first_rc == rc_kreg) {
 2249       // mem -> kreg
 2250       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2251           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2252         // 64-bit
 2253         int offset = ra_->reg2offset(src_first);
 2254         if (masm) {
 2255           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2256 #ifndef PRODUCT
 2257         } else {
 2258           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2259                      Matcher::regName[dst_first],
 2260                      offset);
 2261 #endif
 2262         }
 2263       }
 2264       return 0;
 2265     }
 2266   } else if (src_first_rc == rc_int) {
 2267     // gpr ->
 2268     if (dst_first_rc == rc_stack) {
 2269       // gpr -> mem
 2270       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2271           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2272         // 64-bit
 2273         int offset = ra_->reg2offset(dst_first);
 2274         if (masm) {
 2275           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2276 #ifndef PRODUCT
 2277         } else {
 2278           st->print("movq    [rsp + #%d], %s\t# spill",
 2279                      offset,
 2280                      Matcher::regName[src_first]);
 2281 #endif
 2282         }
 2283       } else {
 2284         // 32-bit
 2285         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2286         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2287         int offset = ra_->reg2offset(dst_first);
 2288         if (masm) {
 2289           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2290 #ifndef PRODUCT
 2291         } else {
 2292           st->print("movl    [rsp + #%d], %s\t# spill",
 2293                      offset,
 2294                      Matcher::regName[src_first]);
 2295 #endif
 2296         }
 2297       }
 2298       return 0;
 2299     } else if (dst_first_rc == rc_int) {
 2300       // gpr -> gpr
 2301       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2302           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2303         // 64-bit
 2304         if (masm) {
 2305           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2306                   as_Register(Matcher::_regEncode[src_first]));
 2307 #ifndef PRODUCT
 2308         } else {
 2309           st->print("movq    %s, %s\t# spill",
 2310                      Matcher::regName[dst_first],
 2311                      Matcher::regName[src_first]);
 2312 #endif
 2313         }
 2314         return 0;
 2315       } else {
 2316         // 32-bit
 2317         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2318         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2319         if (masm) {
 2320           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2321                   as_Register(Matcher::_regEncode[src_first]));
 2322 #ifndef PRODUCT
 2323         } else {
 2324           st->print("movl    %s, %s\t# spill",
 2325                      Matcher::regName[dst_first],
 2326                      Matcher::regName[src_first]);
 2327 #endif
 2328         }
 2329         return 0;
 2330       }
 2331     } else if (dst_first_rc == rc_float) {
 2332       // gpr -> xmm
 2333       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2334           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2335         // 64-bit
 2336         if (masm) {
 2337           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2338 #ifndef PRODUCT
 2339         } else {
 2340           st->print("movdq   %s, %s\t# spill",
 2341                      Matcher::regName[dst_first],
 2342                      Matcher::regName[src_first]);
 2343 #endif
 2344         }
 2345       } else {
 2346         // 32-bit
 2347         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2348         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2349         if (masm) {
 2350           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2351 #ifndef PRODUCT
 2352         } else {
 2353           st->print("movdl   %s, %s\t# spill",
 2354                      Matcher::regName[dst_first],
 2355                      Matcher::regName[src_first]);
 2356 #endif
 2357         }
 2358       }
 2359       return 0;
 2360     } else if (dst_first_rc == rc_kreg) {
 2361       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2362           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2363         // 64-bit
 2364         if (masm) {
 2365           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2366   #ifndef PRODUCT
 2367         } else {
 2368            st->print("kmovq   %s, %s\t# spill",
 2369                        Matcher::regName[dst_first],
 2370                        Matcher::regName[src_first]);
 2371   #endif
 2372         }
 2373       }
 2374       Unimplemented();
 2375       return 0;
 2376     }
 2377   } else if (src_first_rc == rc_float) {
 2378     // xmm ->
 2379     if (dst_first_rc == rc_stack) {
 2380       // xmm -> mem
 2381       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2382           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2383         // 64-bit
 2384         int offset = ra_->reg2offset(dst_first);
 2385         if (masm) {
 2386           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2387 #ifndef PRODUCT
 2388         } else {
 2389           st->print("movsd   [rsp + #%d], %s\t# spill",
 2390                      offset,
 2391                      Matcher::regName[src_first]);
 2392 #endif
 2393         }
 2394       } else {
 2395         // 32-bit
 2396         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2397         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2398         int offset = ra_->reg2offset(dst_first);
 2399         if (masm) {
 2400           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2401 #ifndef PRODUCT
 2402         } else {
 2403           st->print("movss   [rsp + #%d], %s\t# spill",
 2404                      offset,
 2405                      Matcher::regName[src_first]);
 2406 #endif
 2407         }
 2408       }
 2409       return 0;
 2410     } else if (dst_first_rc == rc_int) {
 2411       // xmm -> gpr
 2412       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2413           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2414         // 64-bit
 2415         if (masm) {
 2416           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2417 #ifndef PRODUCT
 2418         } else {
 2419           st->print("movdq   %s, %s\t# spill",
 2420                      Matcher::regName[dst_first],
 2421                      Matcher::regName[src_first]);
 2422 #endif
 2423         }
 2424       } else {
 2425         // 32-bit
 2426         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2427         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2428         if (masm) {
 2429           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2430 #ifndef PRODUCT
 2431         } else {
 2432           st->print("movdl   %s, %s\t# spill",
 2433                      Matcher::regName[dst_first],
 2434                      Matcher::regName[src_first]);
 2435 #endif
 2436         }
 2437       }
 2438       return 0;
 2439     } else if (dst_first_rc == rc_float) {
 2440       // xmm -> xmm
 2441       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2442           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2443         // 64-bit
 2444         if (masm) {
 2445           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2446 #ifndef PRODUCT
 2447         } else {
 2448           st->print("%s  %s, %s\t# spill",
 2449                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2450                      Matcher::regName[dst_first],
 2451                      Matcher::regName[src_first]);
 2452 #endif
 2453         }
 2454       } else {
 2455         // 32-bit
 2456         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2457         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2458         if (masm) {
 2459           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2460 #ifndef PRODUCT
 2461         } else {
 2462           st->print("%s  %s, %s\t# spill",
 2463                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2464                      Matcher::regName[dst_first],
 2465                      Matcher::regName[src_first]);
 2466 #endif
 2467         }
 2468       }
 2469       return 0;
 2470     } else if (dst_first_rc == rc_kreg) {
 2471       assert(false, "Illegal spilling");
 2472       return 0;
 2473     }
 2474   } else if (src_first_rc == rc_kreg) {
 2475     if (dst_first_rc == rc_stack) {
 2476       // mem -> kreg
 2477       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2478           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2479         // 64-bit
 2480         int offset = ra_->reg2offset(dst_first);
 2481         if (masm) {
 2482           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2483 #ifndef PRODUCT
 2484         } else {
 2485           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2486                      offset,
 2487                      Matcher::regName[src_first]);
 2488 #endif
 2489         }
 2490       }
 2491       return 0;
 2492     } else if (dst_first_rc == rc_int) {
 2493       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2494           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2495         // 64-bit
 2496         if (masm) {
 2497           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2498 #ifndef PRODUCT
 2499         } else {
 2500          st->print("kmovq   %s, %s\t# spill",
 2501                      Matcher::regName[dst_first],
 2502                      Matcher::regName[src_first]);
 2503 #endif
 2504         }
 2505       }
 2506       Unimplemented();
 2507       return 0;
 2508     } else if (dst_first_rc == rc_kreg) {
 2509       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2510           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2511         // 64-bit
 2512         if (masm) {
 2513           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2514 #ifndef PRODUCT
 2515         } else {
 2516          st->print("kmovq   %s, %s\t# spill",
 2517                      Matcher::regName[dst_first],
 2518                      Matcher::regName[src_first]);
 2519 #endif
 2520         }
 2521       }
 2522       return 0;
 2523     } else if (dst_first_rc == rc_float) {
 2524       assert(false, "Illegal spill");
 2525       return 0;
 2526     }
 2527   }
 2528 
 2529   assert(0," foo ");
 2530   Unimplemented();
 2531   return 0;
 2532 }
 2533 
 2534 #ifndef PRODUCT
 2535 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2536   implementation(nullptr, ra_, false, st);
 2537 }
 2538 #endif
 2539 
 2540 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2541   implementation(masm, ra_, false, nullptr);
 2542 }
 2543 
 2544 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2545   return MachNode::size(ra_);
 2546 }
 2547 
 2548 //=============================================================================
 2549 #ifndef PRODUCT
 2550 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2551 {
 2552   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2553   int reg = ra_->get_reg_first(this);
 2554   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2555             Matcher::regName[reg], offset);
 2556 }
 2557 #endif
 2558 
 2559 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2560 {
 2561   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2562   int reg = ra_->get_encode(this);
 2563 
 2564   __ lea(as_Register(reg), Address(rsp, offset));
 2565 }
 2566 
 2567 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2568 {
 2569   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2570   if (ra_->get_encode(this) > 15) {
 2571     return (offset < 0x80) ? 6 : 9; // REX2
 2572   } else {
 2573     return (offset < 0x80) ? 5 : 8; // REX
 2574   }
 2575 }
 2576 
 2577 //=============================================================================
 2578 #ifndef PRODUCT
 2579 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2580 {
 2581   st->print_cr("MachVEPNode");
 2582 }
 2583 #endif
 2584 
 2585 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2586 {
 2587   CodeBuffer* cbuf = masm->code();
 2588   uint insts_size = cbuf->insts_size();
 2589   if (!_verified) {
 2590     __ ic_check(1);
 2591   } else {
 2592     // TODO 8284443 Avoid creation of temporary frame
 2593     if (ra_->C->stub_function() == nullptr) {
 2594       __ verified_entry(ra_->C, 0);
 2595       __ entry_barrier();
 2596       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2597       __ remove_frame(initial_framesize, false);
 2598     }
 2599     // Unpack inline type args passed as oop and then jump to
 2600     // the verified entry point (skipping the unverified entry).
 2601     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2602     // Emit code for verified entry and save increment for stack repair on return
 2603     __ verified_entry(ra_->C, sp_inc);
 2604     if (Compile::current()->output()->in_scratch_emit_size()) {
 2605       Label dummy_verified_entry;
 2606       __ jmp(dummy_verified_entry);
 2607     } else {
 2608       __ jmp(*_verified_entry);
 2609     }
 2610   }
 2611   /* WARNING these NOPs are critical so that verified entry point is properly
 2612      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 2613   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 2614   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 2615   if (nops_cnt > 0) {
 2616     __ nop(nops_cnt);
 2617   }
 2618 }
 2619 
 2620 //=============================================================================
 2621 #ifndef PRODUCT
 2622 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2623 {
 2624   if (UseCompressedClassPointers) {
 2625     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2626     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2627   } else {
 2628     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2629     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2630   }
 2631   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2632 }
 2633 #endif
 2634 
 2635 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2636 {
 2637   __ ic_check(InteriorEntryAlignment);
 2638 }
 2639 
 2640 
 2641 //=============================================================================
 2642 
 2643 bool Matcher::supports_vector_calling_convention(void) {
 2644   return EnableVectorSupport;
 2645 }
 2646 
 2647 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2648   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2649 }
 2650 
 2651 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2652   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2653 }
 2654 
 2655 #ifdef ASSERT
 2656 static bool is_ndd_demotable(const MachNode* mdef) {
 2657   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2658 }
 2659 #endif
 2660 
 2661 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2662                                             int oper_index) {
 2663   if (mdef == nullptr) {
 2664     return false;
 2665   }
 2666 
 2667   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2668       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2669     assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
 2670     assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
 2671     return false;
 2672   }
 2673 
 2674   // Complex memory operand covers multiple incoming edges needed for
 2675   // address computation. Biasing def towards any address component will not
 2676   // result in NDD demotion by assembler.
 2677   if (mdef->operand_num_edges(oper_index) != 1) {
 2678     return false;
 2679   }
 2680 
 2681   // Demotion candidate must be register mask compatible with definition.
 2682   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2683   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2684     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2685     return false;
 2686   }
 2687 
 2688   switch (oper_index) {
 2689   // First operand of MachNode corresponding to Intel APX NDD selection
 2690   // pattern can share its assigned register with definition operand if
 2691   // their live ranges do not overlap. In such a scenario we can demote
 2692   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2693   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2694   // are decorated with a special flag by instruction selector.
 2695   case 1:
 2696     return is_ndd_demotable_opr1(mdef);
 2697 
 2698   // Definition operand of commutative operation can be biased towards second
 2699   // operand.
 2700   case 2:
 2701     return is_ndd_demotable_opr2(mdef);
 2702 
 2703   // Current scheme only selects up to two biasing candidates
 2704   default:
 2705     assert(false, "unhandled operand index: %s", mdef->Name());
 2706     break;
 2707   }
 2708 
 2709   return false;
 2710 }
 2711 
 2712 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2713   assert(EnableVectorSupport, "sanity");
 2714   int lo = XMM0_num;
 2715   int hi = XMM0b_num;
 2716   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2717   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2718   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2719   return OptoRegPair(hi, lo);
 2720 }
 2721 
 2722 // Is this branch offset short enough that a short branch can be used?
 2723 //
 2724 // NOTE: If the platform does not provide any short branch variants, then
 2725 //       this method should return false for offset 0.
 2726 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2727   // The passed offset is relative to address of the branch.
 2728   // On 86 a branch displacement is calculated relative to address
 2729   // of a next instruction.
 2730   offset -= br_size;
 2731 
 2732   // the short version of jmpConUCF2 contains multiple branches,
 2733   // making the reach slightly less
 2734   if (rule == jmpConUCF2_rule)
 2735     return (-126 <= offset && offset <= 125);
 2736   return (-128 <= offset && offset <= 127);
 2737 }
 2738 
 2739 // Return whether or not this register is ever used as an argument.
 2740 // This function is used on startup to build the trampoline stubs in
 2741 // generateOptoStub.  Registers not mentioned will be killed by the VM
 2742 // call in the trampoline, and arguments in those registers not be
 2743 // available to the callee.
 2744 bool Matcher::can_be_java_arg(int reg)
 2745 {
 2746   return
 2747     reg ==  RDI_num || reg == RDI_H_num ||
 2748     reg ==  RSI_num || reg == RSI_H_num ||
 2749     reg ==  RDX_num || reg == RDX_H_num ||
 2750     reg ==  RCX_num || reg == RCX_H_num ||
 2751     reg ==   R8_num || reg ==  R8_H_num ||
 2752     reg ==   R9_num || reg ==  R9_H_num ||
 2753     reg ==  R12_num || reg == R12_H_num ||
 2754     reg == XMM0_num || reg == XMM0b_num ||
 2755     reg == XMM1_num || reg == XMM1b_num ||
 2756     reg == XMM2_num || reg == XMM2b_num ||
 2757     reg == XMM3_num || reg == XMM3b_num ||
 2758     reg == XMM4_num || reg == XMM4b_num ||
 2759     reg == XMM5_num || reg == XMM5b_num ||
 2760     reg == XMM6_num || reg == XMM6b_num ||
 2761     reg == XMM7_num || reg == XMM7b_num;
 2762 }
 2763 
 2764 bool Matcher::is_spillable_arg(int reg)
 2765 {
 2766   return can_be_java_arg(reg);
 2767 }
 2768 
 2769 uint Matcher::int_pressure_limit()
 2770 {
 2771   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2772 }
 2773 
 2774 uint Matcher::float_pressure_limit()
 2775 {
 2776   // After experiment around with different values, the following default threshold
 2777   // works best for LCM's register pressure scheduling on x64.
 2778   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2779   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2780   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2781 }
 2782 
 2783 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 2784   // In 64 bit mode a code which use multiply when
 2785   // devisor is constant is faster than hardware
 2786   // DIV instruction (it uses MulHiL).
 2787   return false;
 2788 }
 2789 
 2790 // Register for DIVI projection of divmodI
 2791 const RegMask& Matcher::divI_proj_mask() {
 2792   return INT_RAX_REG_mask();
 2793 }
 2794 
 2795 // Register for MODI projection of divmodI
 2796 const RegMask& Matcher::modI_proj_mask() {
 2797   return INT_RDX_REG_mask();
 2798 }
 2799 
 2800 // Register for DIVL projection of divmodL
 2801 const RegMask& Matcher::divL_proj_mask() {
 2802   return LONG_RAX_REG_mask();
 2803 }
 2804 
 2805 // Register for MODL projection of divmodL
 2806 const RegMask& Matcher::modL_proj_mask() {
 2807   return LONG_RDX_REG_mask();
 2808 }
 2809 
 2810 %}
 2811 
 2812 source_hpp %{
 2813 // Header information of the source block.
 2814 // Method declarations/definitions which are used outside
 2815 // the ad-scope can conveniently be defined here.
 2816 //
 2817 // To keep related declarations/definitions/uses close together,
 2818 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2819 
 2820 #include "runtime/vm_version.hpp"
 2821 
 2822 class NativeJump;
 2823 
 2824 class CallStubImpl {
 2825 
 2826   //--------------------------------------------------------------
 2827   //---<  Used for optimization in Compile::shorten_branches  >---
 2828   //--------------------------------------------------------------
 2829 
 2830  public:
 2831   // Size of call trampoline stub.
 2832   static uint size_call_trampoline() {
 2833     return 0; // no call trampolines on this platform
 2834   }
 2835 
 2836   // number of relocations needed by a call trampoline stub
 2837   static uint reloc_call_trampoline() {
 2838     return 0; // no call trampolines on this platform
 2839   }
 2840 };
 2841 
 2842 class HandlerImpl {
 2843 
 2844  public:
 2845 
 2846   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2847 
 2848   static uint size_deopt_handler() {
 2849     // one call and one jmp.
 2850     return 7;
 2851   }
 2852 };
 2853 
 2854 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2855   switch(bytes) {
 2856     case  4: // fall-through
 2857     case  8: // fall-through
 2858     case 16: return Assembler::AVX_128bit;
 2859     case 32: return Assembler::AVX_256bit;
 2860     case 64: return Assembler::AVX_512bit;
 2861 
 2862     default: {
 2863       ShouldNotReachHere();
 2864       return Assembler::AVX_NoVec;
 2865     }
 2866   }
 2867 }
 2868 
 2869 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2870   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2871 }
 2872 
 2873 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2874   uint def_idx = use->operand_index(opnd);
 2875   Node* def = use->in(def_idx);
 2876   return vector_length_encoding(def);
 2877 }
 2878 
 2879 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2880   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2881          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2882 }
 2883 
 2884 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2885   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2886            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2887 }
 2888 
 2889 class Node::PD {
 2890 public:
 2891   enum NodeFlags : uint64_t {
 2892     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2893     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2894     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2895     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2896     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2897     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2898     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2899     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2900     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2901     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2902     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2903     Flag_ndd_demotable_opr1   = Node::_last_flag << 12,
 2904     Flag_ndd_demotable_opr2   = Node::_last_flag << 13,
 2905     _last_flag                = Flag_ndd_demotable_opr2
 2906   };
 2907 };
 2908 
 2909 %} // end source_hpp
 2910 
 2911 source %{
 2912 
 2913 #include "opto/addnode.hpp"
 2914 #include "c2_intelJccErratum_x86.hpp"
 2915 
 2916 void PhaseOutput::pd_perform_mach_node_analysis() {
 2917   if (VM_Version::has_intel_jcc_erratum()) {
 2918     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2919     _buf_sizes._code += extra_padding;
 2920   }
 2921 }
 2922 
 2923 int MachNode::pd_alignment_required() const {
 2924   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2925     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2926     return IntelJccErratum::largest_jcc_size() + 1;
 2927   } else {
 2928     return 1;
 2929   }
 2930 }
 2931 
 2932 int MachNode::compute_padding(int current_offset) const {
 2933   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2934     Compile* C = Compile::current();
 2935     PhaseOutput* output = C->output();
 2936     Block* block = output->block();
 2937     int index = output->index();
 2938     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2939   } else {
 2940     return 0;
 2941   }
 2942 }
 2943 
 2944 // Emit deopt handler code.
 2945 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2946 
 2947   // Note that the code buffer's insts_mark is always relative to insts.
 2948   // That's why we must use the macroassembler to generate a handler.
 2949   address base = __ start_a_stub(size_deopt_handler());
 2950   if (base == nullptr) {
 2951     ciEnv::current()->record_failure("CodeCache is full");
 2952     return 0;  // CodeBuffer::expand failed
 2953   }
 2954   int offset = __ offset();
 2955 
 2956   Label start;
 2957   __ bind(start);
 2958 
 2959   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2960 
 2961   int entry_offset = __ offset();
 2962 
 2963   __ jmp(start);
 2964 
 2965   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2966   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2967          "out of bounds read in post-call NOP check");
 2968   __ end_a_stub();
 2969   return entry_offset;
 2970 }
 2971 
 2972 static Assembler::Width widthForType(BasicType bt) {
 2973   if (bt == T_BYTE) {
 2974     return Assembler::B;
 2975   } else if (bt == T_SHORT) {
 2976     return Assembler::W;
 2977   } else if (bt == T_INT) {
 2978     return Assembler::D;
 2979   } else {
 2980     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2981     return Assembler::Q;
 2982   }
 2983 }
 2984 
 2985 //=============================================================================
 2986 
 2987   // Float masks come from different places depending on platform.
 2988   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2989   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2990   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2991   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2992   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2993   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2994   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2995   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2996   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2997   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2998   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2999   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 3000   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 3001   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 3002   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 3003   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 3004   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 3005   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 3006   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 3007 
 3008 //=============================================================================
 3009 bool Matcher::match_rule_supported(int opcode) {
 3010   if (!has_match_rule(opcode)) {
 3011     return false; // no match rule present
 3012   }
 3013   switch (opcode) {
 3014     case Op_AbsVL:
 3015     case Op_StoreVectorScatter:
 3016       if (UseAVX < 3) {
 3017         return false;
 3018       }
 3019       break;
 3020     case Op_PopCountI:
 3021     case Op_PopCountL:
 3022       if (!UsePopCountInstruction) {
 3023         return false;
 3024       }
 3025       break;
 3026     case Op_PopCountVI:
 3027       if (UseAVX < 2) {
 3028         return false;
 3029       }
 3030       break;
 3031     case Op_CompressV:
 3032     case Op_ExpandV:
 3033     case Op_PopCountVL:
 3034       if (UseAVX < 2) {
 3035         return false;
 3036       }
 3037       break;
 3038     case Op_MulVI:
 3039       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3040         return false;
 3041       }
 3042       break;
 3043     case Op_MulVL:
 3044       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3045         return false;
 3046       }
 3047       break;
 3048     case Op_MulReductionVL:
 3049       if (VM_Version::supports_avx512dq() == false) {
 3050         return false;
 3051       }
 3052       break;
 3053     case Op_AbsVB:
 3054     case Op_AbsVS:
 3055     case Op_AbsVI:
 3056     case Op_AddReductionVI:
 3057     case Op_AndReductionV:
 3058     case Op_OrReductionV:
 3059     case Op_XorReductionV:
 3060       if (UseSSE < 3) { // requires at least SSSE3
 3061         return false;
 3062       }
 3063       break;
 3064     case Op_MaxHF:
 3065     case Op_MinHF:
 3066       if (!VM_Version::supports_avx512vlbw()) {
 3067         return false;
 3068       }  // fallthrough
 3069     case Op_AddHF:
 3070     case Op_DivHF:
 3071     case Op_FmaHF:
 3072     case Op_MulHF:
 3073     case Op_ReinterpretS2HF:
 3074     case Op_ReinterpretHF2S:
 3075     case Op_SubHF:
 3076     case Op_SqrtHF:
 3077       if (!VM_Version::supports_avx512_fp16()) {
 3078         return false;
 3079       }
 3080       break;
 3081     case Op_VectorLoadShuffle:
 3082     case Op_VectorRearrange:
 3083     case Op_MulReductionVI:
 3084       if (UseSSE < 4) { // requires at least SSE4
 3085         return false;
 3086       }
 3087       break;
 3088     case Op_IsInfiniteF:
 3089     case Op_IsInfiniteD:
 3090       if (!VM_Version::supports_avx512dq()) {
 3091         return false;
 3092       }
 3093       break;
 3094     case Op_SqrtVD:
 3095     case Op_SqrtVF:
 3096     case Op_VectorMaskCmp:
 3097     case Op_VectorCastB2X:
 3098     case Op_VectorCastS2X:
 3099     case Op_VectorCastI2X:
 3100     case Op_VectorCastL2X:
 3101     case Op_VectorCastF2X:
 3102     case Op_VectorCastD2X:
 3103     case Op_VectorUCastB2X:
 3104     case Op_VectorUCastS2X:
 3105     case Op_VectorUCastI2X:
 3106     case Op_VectorMaskCast:
 3107       if (UseAVX < 1) { // enabled for AVX only
 3108         return false;
 3109       }
 3110       break;
 3111     case Op_PopulateIndex:
 3112       if (UseAVX < 2) {
 3113         return false;
 3114       }
 3115       break;
 3116     case Op_RoundVF:
 3117       if (UseAVX < 2) { // enabled for AVX2 only
 3118         return false;
 3119       }
 3120       break;
 3121     case Op_RoundVD:
 3122       if (UseAVX < 3) {
 3123         return false;  // enabled for AVX3 only
 3124       }
 3125       break;
 3126     case Op_CompareAndSwapL:
 3127     case Op_CompareAndSwapP:
 3128       break;
 3129     case Op_StrIndexOf:
 3130       if (!UseSSE42Intrinsics) {
 3131         return false;
 3132       }
 3133       break;
 3134     case Op_StrIndexOfChar:
 3135       if (!UseSSE42Intrinsics) {
 3136         return false;
 3137       }
 3138       break;
 3139     case Op_OnSpinWait:
 3140       if (VM_Version::supports_on_spin_wait() == false) {
 3141         return false;
 3142       }
 3143       break;
 3144     case Op_MulVB:
 3145     case Op_LShiftVB:
 3146     case Op_RShiftVB:
 3147     case Op_URShiftVB:
 3148     case Op_VectorInsert:
 3149     case Op_VectorLoadMask:
 3150     case Op_VectorStoreMask:
 3151     case Op_VectorBlend:
 3152       if (UseSSE < 4) {
 3153         return false;
 3154       }
 3155       break;
 3156     case Op_MaxD:
 3157     case Op_MaxF:
 3158     case Op_MinD:
 3159     case Op_MinF:
 3160       if (UseAVX < 1) { // enabled for AVX only
 3161         return false;
 3162       }
 3163       break;
 3164     case Op_CacheWB:
 3165     case Op_CacheWBPreSync:
 3166     case Op_CacheWBPostSync:
 3167       if (!VM_Version::supports_data_cache_line_flush()) {
 3168         return false;
 3169       }
 3170       break;
 3171     case Op_ExtractB:
 3172     case Op_ExtractL:
 3173     case Op_ExtractI:
 3174     case Op_RoundDoubleMode:
 3175       if (UseSSE < 4) {
 3176         return false;
 3177       }
 3178       break;
 3179     case Op_RoundDoubleModeV:
 3180       if (VM_Version::supports_avx() == false) {
 3181         return false; // 128bit vroundpd is not available
 3182       }
 3183       break;
 3184     case Op_LoadVectorGather:
 3185     case Op_LoadVectorGatherMasked:
 3186       if (UseAVX < 2) {
 3187         return false;
 3188       }
 3189       break;
 3190     case Op_FmaF:
 3191     case Op_FmaD:
 3192     case Op_FmaVD:
 3193     case Op_FmaVF:
 3194       if (!UseFMA) {
 3195         return false;
 3196       }
 3197       break;
 3198     case Op_MacroLogicV:
 3199       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3200         return false;
 3201       }
 3202       break;
 3203 
 3204     case Op_VectorCmpMasked:
 3205     case Op_VectorMaskGen:
 3206       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3207         return false;
 3208       }
 3209       break;
 3210     case Op_VectorMaskFirstTrue:
 3211     case Op_VectorMaskLastTrue:
 3212     case Op_VectorMaskTrueCount:
 3213     case Op_VectorMaskToLong:
 3214       if (UseAVX < 1) {
 3215          return false;
 3216       }
 3217       break;
 3218     case Op_RoundF:
 3219     case Op_RoundD:
 3220       break;
 3221     case Op_CopySignD:
 3222     case Op_CopySignF:
 3223       if (UseAVX < 3)  {
 3224         return false;
 3225       }
 3226       if (!VM_Version::supports_avx512vl()) {
 3227         return false;
 3228       }
 3229       break;
 3230     case Op_CompressBits:
 3231     case Op_ExpandBits:
 3232       if (!VM_Version::supports_bmi2()) {
 3233         return false;
 3234       }
 3235       break;
 3236     case Op_CompressM:
 3237       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3238         return false;
 3239       }
 3240       break;
 3241     case Op_ConvF2HF:
 3242     case Op_ConvHF2F:
 3243       if (!VM_Version::supports_float16()) {
 3244         return false;
 3245       }
 3246       break;
 3247     case Op_VectorCastF2HF:
 3248     case Op_VectorCastHF2F:
 3249       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3250         return false;
 3251       }
 3252       break;
 3253   }
 3254   return true;  // Match rules are supported by default.
 3255 }
 3256 
 3257 //------------------------------------------------------------------------
 3258 
 3259 static inline bool is_pop_count_instr_target(BasicType bt) {
 3260   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3261          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3262 }
 3263 
 3264 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3265   return match_rule_supported_vector(opcode, vlen, bt);
 3266 }
 3267 
 3268 // Identify extra cases that we might want to provide match rules for vector nodes and
 3269 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3270 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3271   if (!match_rule_supported(opcode)) {
 3272     return false;
 3273   }
 3274   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3275   //   * SSE2 supports 128bit vectors for all types;
 3276   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3277   //   * AVX2 supports 256bit vectors for all types;
 3278   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3279   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3280   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3281   // And MaxVectorSize is taken into account as well.
 3282   if (!vector_size_supported(bt, vlen)) {
 3283     return false;
 3284   }
 3285   // Special cases which require vector length follow:
 3286   //   * implementation limitations
 3287   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3288   //   * 128bit vroundpd instruction is present only in AVX1
 3289   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3290   switch (opcode) {
 3291     case Op_MaxVHF:
 3292     case Op_MinVHF:
 3293       if (!VM_Version::supports_avx512bw()) {
 3294         return false;
 3295       }
 3296     case Op_AddVHF:
 3297     case Op_DivVHF:
 3298     case Op_FmaVHF:
 3299     case Op_MulVHF:
 3300     case Op_SubVHF:
 3301     case Op_SqrtVHF:
 3302       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3303         return false;
 3304       }
 3305       if (!VM_Version::supports_avx512_fp16()) {
 3306         return false;
 3307       }
 3308       break;
 3309     case Op_AbsVF:
 3310     case Op_NegVF:
 3311       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3312         return false; // 512bit vandps and vxorps are not available
 3313       }
 3314       break;
 3315     case Op_AbsVD:
 3316     case Op_NegVD:
 3317       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3318         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3319       }
 3320       break;
 3321     case Op_RotateRightV:
 3322     case Op_RotateLeftV:
 3323       if (bt != T_INT && bt != T_LONG) {
 3324         return false;
 3325       } // fallthrough
 3326     case Op_MacroLogicV:
 3327       if (!VM_Version::supports_evex() ||
 3328           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3329         return false;
 3330       }
 3331       break;
 3332     case Op_ClearArray:
 3333     case Op_VectorMaskGen:
 3334     case Op_VectorCmpMasked:
 3335       if (!VM_Version::supports_avx512bw()) {
 3336         return false;
 3337       }
 3338       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3339         return false;
 3340       }
 3341       break;
 3342     case Op_LoadVectorMasked:
 3343     case Op_StoreVectorMasked:
 3344       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3345         return false;
 3346       }
 3347       break;
 3348     case Op_UMinV:
 3349     case Op_UMaxV:
 3350       if (UseAVX == 0) {
 3351         return false;
 3352       }
 3353       break;
 3354     case Op_MaxV:
 3355     case Op_MinV:
 3356       if (UseSSE < 4 && is_integral_type(bt)) {
 3357         return false;
 3358       }
 3359       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3360           // Float/Double intrinsics are enabled for AVX family currently.
 3361           if (UseAVX == 0) {
 3362             return false;
 3363           }
 3364           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3365             return false;
 3366           }
 3367       }
 3368       break;
 3369     case Op_CallLeafVector:
 3370       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3371         return false;
 3372       }
 3373       break;
 3374     case Op_AddReductionVI:
 3375       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3376         return false;
 3377       }
 3378       // fallthrough
 3379     case Op_AndReductionV:
 3380     case Op_OrReductionV:
 3381     case Op_XorReductionV:
 3382       if (is_subword_type(bt) && (UseSSE < 4)) {
 3383         return false;
 3384       }
 3385       break;
 3386     case Op_MinReductionV:
 3387     case Op_MaxReductionV:
 3388       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3389         return false;
 3390       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3391         return false;
 3392       }
 3393       // Float/Double intrinsics enabled for AVX family.
 3394       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3395         return false;
 3396       }
 3397       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3398         return false;
 3399       }
 3400       break;
 3401     case Op_VectorBlend:
 3402       if (UseAVX == 0 && size_in_bits < 128) {
 3403         return false;
 3404       }
 3405       break;
 3406     case Op_VectorTest:
 3407       if (UseSSE < 4) {
 3408         return false; // Implementation limitation
 3409       } else if (size_in_bits < 32) {
 3410         return false; // Implementation limitation
 3411       }
 3412       break;
 3413     case Op_VectorLoadShuffle:
 3414     case Op_VectorRearrange:
 3415       if(vlen == 2) {
 3416         return false; // Implementation limitation due to how shuffle is loaded
 3417       } else if (size_in_bits == 256 && UseAVX < 2) {
 3418         return false; // Implementation limitation
 3419       }
 3420       break;
 3421     case Op_VectorLoadMask:
 3422     case Op_VectorMaskCast:
 3423       if (size_in_bits == 256 && UseAVX < 2) {
 3424         return false; // Implementation limitation
 3425       }
 3426       // fallthrough
 3427     case Op_VectorStoreMask:
 3428       if (vlen == 2) {
 3429         return false; // Implementation limitation
 3430       }
 3431       break;
 3432     case Op_PopulateIndex:
 3433       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3434         return false;
 3435       }
 3436       break;
 3437     case Op_VectorCastB2X:
 3438     case Op_VectorCastS2X:
 3439     case Op_VectorCastI2X:
 3440       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3441         return false;
 3442       }
 3443       break;
 3444     case Op_VectorCastL2X:
 3445       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3446         return false;
 3447       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3448         return false;
 3449       }
 3450       break;
 3451     case Op_VectorCastF2X: {
 3452         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3453         // happen after intermediate conversion to integer and special handling
 3454         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3455         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3456         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3457           return false;
 3458         }
 3459       }
 3460       // fallthrough
 3461     case Op_VectorCastD2X:
 3462       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3463         return false;
 3464       }
 3465       break;
 3466     case Op_VectorCastF2HF:
 3467     case Op_VectorCastHF2F:
 3468       if (!VM_Version::supports_f16c() &&
 3469          ((!VM_Version::supports_evex() ||
 3470          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3471         return false;
 3472       }
 3473       break;
 3474     case Op_RoundVD:
 3475       if (!VM_Version::supports_avx512dq()) {
 3476         return false;
 3477       }
 3478       break;
 3479     case Op_MulReductionVI:
 3480       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3481         return false;
 3482       }
 3483       break;
 3484     case Op_LoadVectorGatherMasked:
 3485       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3486         return false;
 3487       }
 3488       if (is_subword_type(bt) &&
 3489          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3490           (size_in_bits < 64)                                      ||
 3491           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3492         return false;
 3493       }
 3494       break;
 3495     case Op_StoreVectorScatterMasked:
 3496     case Op_StoreVectorScatter:
 3497       if (is_subword_type(bt)) {
 3498         return false;
 3499       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3500         return false;
 3501       }
 3502       // fallthrough
 3503     case Op_LoadVectorGather:
 3504       if (!is_subword_type(bt) && size_in_bits == 64) {
 3505         return false;
 3506       }
 3507       if (is_subword_type(bt) && size_in_bits < 64) {
 3508         return false;
 3509       }
 3510       break;
 3511     case Op_SaturatingAddV:
 3512     case Op_SaturatingSubV:
 3513       if (UseAVX < 1) {
 3514         return false; // Implementation limitation
 3515       }
 3516       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3517         return false;
 3518       }
 3519       break;
 3520     case Op_SelectFromTwoVector:
 3521        if (size_in_bits < 128) {
 3522          return false;
 3523        }
 3524        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3525          return false;
 3526        }
 3527        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3528          return false;
 3529        }
 3530        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3531          return false;
 3532        }
 3533        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3534          return false;
 3535        }
 3536        break;
 3537     case Op_MaskAll:
 3538       if (!VM_Version::supports_evex()) {
 3539         return false;
 3540       }
 3541       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3542         return false;
 3543       }
 3544       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3545         return false;
 3546       }
 3547       break;
 3548     case Op_VectorMaskCmp:
 3549       if (vlen < 2 || size_in_bits < 32) {
 3550         return false;
 3551       }
 3552       break;
 3553     case Op_CompressM:
 3554       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3555         return false;
 3556       }
 3557       break;
 3558     case Op_CompressV:
 3559     case Op_ExpandV:
 3560       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3561         return false;
 3562       }
 3563       if (size_in_bits < 128 ) {
 3564         return false;
 3565       }
 3566     case Op_VectorLongToMask:
 3567       if (UseAVX < 1) {
 3568         return false;
 3569       }
 3570       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3571         return false;
 3572       }
 3573       break;
 3574     case Op_SignumVD:
 3575     case Op_SignumVF:
 3576       if (UseAVX < 1) {
 3577         return false;
 3578       }
 3579       break;
 3580     case Op_PopCountVI:
 3581     case Op_PopCountVL: {
 3582         if (!is_pop_count_instr_target(bt) &&
 3583             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3584           return false;
 3585         }
 3586       }
 3587       break;
 3588     case Op_ReverseV:
 3589     case Op_ReverseBytesV:
 3590       if (UseAVX < 2) {
 3591         return false;
 3592       }
 3593       break;
 3594     case Op_CountTrailingZerosV:
 3595     case Op_CountLeadingZerosV:
 3596       if (UseAVX < 2) {
 3597         return false;
 3598       }
 3599       break;
 3600   }
 3601   return true;  // Per default match rules are supported.
 3602 }
 3603 
 3604 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3605   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3606   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3607   // of their non-masked counterpart with mask edge being the differentiator.
 3608   // This routine does a strict check on the existence of masked operation patterns
 3609   // by returning a default false value for all the other opcodes apart from the
 3610   // ones whose masked instruction patterns are defined in this file.
 3611   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3612     return false;
 3613   }
 3614 
 3615   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3616   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3617     return false;
 3618   }
 3619   switch(opcode) {
 3620     // Unary masked operations
 3621     case Op_AbsVB:
 3622     case Op_AbsVS:
 3623       if(!VM_Version::supports_avx512bw()) {
 3624         return false;  // Implementation limitation
 3625       }
 3626     case Op_AbsVI:
 3627     case Op_AbsVL:
 3628       return true;
 3629 
 3630     // Ternary masked operations
 3631     case Op_FmaVF:
 3632     case Op_FmaVD:
 3633       return true;
 3634 
 3635     case Op_MacroLogicV:
 3636       if(bt != T_INT && bt != T_LONG) {
 3637         return false;
 3638       }
 3639       return true;
 3640 
 3641     // Binary masked operations
 3642     case Op_AddVB:
 3643     case Op_AddVS:
 3644     case Op_SubVB:
 3645     case Op_SubVS:
 3646     case Op_MulVS:
 3647     case Op_LShiftVS:
 3648     case Op_RShiftVS:
 3649     case Op_URShiftVS:
 3650       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3651       if (!VM_Version::supports_avx512bw()) {
 3652         return false;  // Implementation limitation
 3653       }
 3654       return true;
 3655 
 3656     case Op_MulVL:
 3657       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3658       if (!VM_Version::supports_avx512dq()) {
 3659         return false;  // Implementation limitation
 3660       }
 3661       return true;
 3662 
 3663     case Op_AndV:
 3664     case Op_OrV:
 3665     case Op_XorV:
 3666     case Op_RotateRightV:
 3667     case Op_RotateLeftV:
 3668       if (bt != T_INT && bt != T_LONG) {
 3669         return false; // Implementation limitation
 3670       }
 3671       return true;
 3672 
 3673     case Op_VectorLoadMask:
 3674       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3675       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3676         return false;
 3677       }
 3678       return true;
 3679 
 3680     case Op_AddVI:
 3681     case Op_AddVL:
 3682     case Op_AddVF:
 3683     case Op_AddVD:
 3684     case Op_SubVI:
 3685     case Op_SubVL:
 3686     case Op_SubVF:
 3687     case Op_SubVD:
 3688     case Op_MulVI:
 3689     case Op_MulVF:
 3690     case Op_MulVD:
 3691     case Op_DivVF:
 3692     case Op_DivVD:
 3693     case Op_SqrtVF:
 3694     case Op_SqrtVD:
 3695     case Op_LShiftVI:
 3696     case Op_LShiftVL:
 3697     case Op_RShiftVI:
 3698     case Op_RShiftVL:
 3699     case Op_URShiftVI:
 3700     case Op_URShiftVL:
 3701     case Op_LoadVectorMasked:
 3702     case Op_StoreVectorMasked:
 3703     case Op_LoadVectorGatherMasked:
 3704     case Op_StoreVectorScatterMasked:
 3705       return true;
 3706 
 3707     case Op_UMinV:
 3708     case Op_UMaxV:
 3709       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3710         return false;
 3711       } // fallthrough
 3712     case Op_MaxV:
 3713     case Op_MinV:
 3714       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3715         return false; // Implementation limitation
 3716       }
 3717       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3718         return false; // Implementation limitation
 3719       }
 3720       return true;
 3721     case Op_SaturatingAddV:
 3722     case Op_SaturatingSubV:
 3723       if (!is_subword_type(bt)) {
 3724         return false;
 3725       }
 3726       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3727         return false; // Implementation limitation
 3728       }
 3729       return true;
 3730 
 3731     case Op_VectorMaskCmp:
 3732       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3733         return false; // Implementation limitation
 3734       }
 3735       return true;
 3736 
 3737     case Op_VectorRearrange:
 3738       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3739         return false; // Implementation limitation
 3740       }
 3741       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3742         return false; // Implementation limitation
 3743       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3744         return false; // Implementation limitation
 3745       }
 3746       return true;
 3747 
 3748     // Binary Logical operations
 3749     case Op_AndVMask:
 3750     case Op_OrVMask:
 3751     case Op_XorVMask:
 3752       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3753         return false; // Implementation limitation
 3754       }
 3755       return true;
 3756 
 3757     case Op_PopCountVI:
 3758     case Op_PopCountVL:
 3759       if (!is_pop_count_instr_target(bt)) {
 3760         return false;
 3761       }
 3762       return true;
 3763 
 3764     case Op_MaskAll:
 3765       return true;
 3766 
 3767     case Op_CountLeadingZerosV:
 3768       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3769         return true;
 3770       }
 3771     default:
 3772       return false;
 3773   }
 3774 }
 3775 
 3776 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3777   return false;
 3778 }
 3779 
 3780 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3781 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3782   switch (elem_bt) {
 3783     case T_BYTE:  return false;
 3784     case T_SHORT: return !VM_Version::supports_avx512bw();
 3785     case T_INT:   return !VM_Version::supports_avx();
 3786     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3787     default:
 3788       ShouldNotReachHere();
 3789       return false;
 3790   }
 3791 }
 3792 
 3793 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3794   // Prefer predicate if the mask type is "TypeVectMask".
 3795   return vt->isa_vectmask() != nullptr;
 3796 }
 3797 
 3798 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3799   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3800   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3801   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3802       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3803     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3804     return new legVecZOper();
 3805   }
 3806   if (legacy) {
 3807     switch (ideal_reg) {
 3808       case Op_VecS: return new legVecSOper();
 3809       case Op_VecD: return new legVecDOper();
 3810       case Op_VecX: return new legVecXOper();
 3811       case Op_VecY: return new legVecYOper();
 3812       case Op_VecZ: return new legVecZOper();
 3813     }
 3814   } else {
 3815     switch (ideal_reg) {
 3816       case Op_VecS: return new vecSOper();
 3817       case Op_VecD: return new vecDOper();
 3818       case Op_VecX: return new vecXOper();
 3819       case Op_VecY: return new vecYOper();
 3820       case Op_VecZ: return new vecZOper();
 3821     }
 3822   }
 3823   ShouldNotReachHere();
 3824   return nullptr;
 3825 }
 3826 
 3827 bool Matcher::is_reg2reg_move(MachNode* m) {
 3828   switch (m->rule()) {
 3829     case MoveVec2Leg_rule:
 3830     case MoveLeg2Vec_rule:
 3831     case MoveF2VL_rule:
 3832     case MoveF2LEG_rule:
 3833     case MoveVL2F_rule:
 3834     case MoveLEG2F_rule:
 3835     case MoveD2VL_rule:
 3836     case MoveD2LEG_rule:
 3837     case MoveVL2D_rule:
 3838     case MoveLEG2D_rule:
 3839       return true;
 3840     default:
 3841       return false;
 3842   }
 3843 }
 3844 
 3845 bool Matcher::is_generic_vector(MachOper* opnd) {
 3846   switch (opnd->opcode()) {
 3847     case VEC:
 3848     case LEGVEC:
 3849       return true;
 3850     default:
 3851       return false;
 3852   }
 3853 }
 3854 
 3855 //------------------------------------------------------------------------
 3856 
 3857 const RegMask* Matcher::predicate_reg_mask(void) {
 3858   return &_VECTMASK_REG_mask;
 3859 }
 3860 
 3861 // Max vector size in bytes. 0 if not supported.
 3862 int Matcher::vector_width_in_bytes(BasicType bt) {
 3863   assert(is_java_primitive(bt), "only primitive type vectors");
 3864   // SSE2 supports 128bit vectors for all types.
 3865   // AVX2 supports 256bit vectors for all types.
 3866   // AVX2/EVEX supports 512bit vectors for all types.
 3867   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3868   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3869   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3870     size = (UseAVX > 2) ? 64 : 32;
 3871   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3872     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3873   // Use flag to limit vector size.
 3874   size = MIN2(size,(int)MaxVectorSize);
 3875   // Minimum 2 values in vector (or 4 for bytes).
 3876   switch (bt) {
 3877   case T_DOUBLE:
 3878   case T_LONG:
 3879     if (size < 16) return 0;
 3880     break;
 3881   case T_FLOAT:
 3882   case T_INT:
 3883     if (size < 8) return 0;
 3884     break;
 3885   case T_BOOLEAN:
 3886     if (size < 4) return 0;
 3887     break;
 3888   case T_CHAR:
 3889     if (size < 4) return 0;
 3890     break;
 3891   case T_BYTE:
 3892     if (size < 4) return 0;
 3893     break;
 3894   case T_SHORT:
 3895     if (size < 4) return 0;
 3896     break;
 3897   default:
 3898     ShouldNotReachHere();
 3899   }
 3900   return size;
 3901 }
 3902 
 3903 // Limits on vector size (number of elements) loaded into vector.
 3904 int Matcher::max_vector_size(const BasicType bt) {
 3905   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3906 }
 3907 int Matcher::min_vector_size(const BasicType bt) {
 3908   int max_size = max_vector_size(bt);
 3909   // Min size which can be loaded into vector is 4 bytes.
 3910   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3911   // Support for calling svml double64 vectors
 3912   if (bt == T_DOUBLE) {
 3913     size = 1;
 3914   }
 3915   return MIN2(size,max_size);
 3916 }
 3917 
 3918 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3919   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3920   // by default on Cascade Lake
 3921   if (VM_Version::is_default_intel_cascade_lake()) {
 3922     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3923   }
 3924   return Matcher::max_vector_size(bt);
 3925 }
 3926 
 3927 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3928   return -1;
 3929 }
 3930 
 3931 // Vector ideal reg corresponding to specified size in bytes
 3932 uint Matcher::vector_ideal_reg(int size) {
 3933   assert(MaxVectorSize >= size, "");
 3934   switch(size) {
 3935     case  4: return Op_VecS;
 3936     case  8: return Op_VecD;
 3937     case 16: return Op_VecX;
 3938     case 32: return Op_VecY;
 3939     case 64: return Op_VecZ;
 3940   }
 3941   ShouldNotReachHere();
 3942   return 0;
 3943 }
 3944 
 3945 // Check for shift by small constant as well
 3946 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3947   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3948       shift->in(2)->get_int() <= 3 &&
 3949       // Are there other uses besides address expressions?
 3950       !matcher->is_visited(shift)) {
 3951     address_visited.set(shift->_idx); // Flag as address_visited
 3952     mstack.push(shift->in(2), Matcher::Visit);
 3953     Node *conv = shift->in(1);
 3954     // Allow Matcher to match the rule which bypass
 3955     // ConvI2L operation for an array index on LP64
 3956     // if the index value is positive.
 3957     if (conv->Opcode() == Op_ConvI2L &&
 3958         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3959         // Are there other uses besides address expressions?
 3960         !matcher->is_visited(conv)) {
 3961       address_visited.set(conv->_idx); // Flag as address_visited
 3962       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3963     } else {
 3964       mstack.push(conv, Matcher::Pre_Visit);
 3965     }
 3966     return true;
 3967   }
 3968   return false;
 3969 }
 3970 
 3971 // This function identifies sub-graphs in which a 'load' node is
 3972 // input to two different nodes, and such that it can be matched
 3973 // with BMI instructions like blsi, blsr, etc.
 3974 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3975 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3976 // refers to the same node.
 3977 //
 3978 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3979 // This is a temporary solution until we make DAGs expressible in ADL.
 3980 template<typename ConType>
 3981 class FusedPatternMatcher {
 3982   Node* _op1_node;
 3983   Node* _mop_node;
 3984   int _con_op;
 3985 
 3986   static int match_next(Node* n, int next_op, int next_op_idx) {
 3987     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3988       return -1;
 3989     }
 3990 
 3991     if (next_op_idx == -1) { // n is commutative, try rotations
 3992       if (n->in(1)->Opcode() == next_op) {
 3993         return 1;
 3994       } else if (n->in(2)->Opcode() == next_op) {
 3995         return 2;
 3996       }
 3997     } else {
 3998       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 3999       if (n->in(next_op_idx)->Opcode() == next_op) {
 4000         return next_op_idx;
 4001       }
 4002     }
 4003     return -1;
 4004   }
 4005 
 4006  public:
 4007   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 4008     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 4009 
 4010   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 4011              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4012              typename ConType::NativeType con_value) {
 4013     if (_op1_node->Opcode() != op1) {
 4014       return false;
 4015     }
 4016     if (_mop_node->outcnt() > 2) {
 4017       return false;
 4018     }
 4019     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4020     if (op1_op2_idx == -1) {
 4021       return false;
 4022     }
 4023     // Memory operation must be the other edge
 4024     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4025 
 4026     // Check that the mop node is really what we want
 4027     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4028       Node* op2_node = _op1_node->in(op1_op2_idx);
 4029       if (op2_node->outcnt() > 1) {
 4030         return false;
 4031       }
 4032       assert(op2_node->Opcode() == op2, "Should be");
 4033       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4034       if (op2_con_idx == -1) {
 4035         return false;
 4036       }
 4037       // Memory operation must be the other edge
 4038       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4039       // Check that the memory operation is the same node
 4040       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4041         // Now check the constant
 4042         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4043         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4044           return true;
 4045         }
 4046       }
 4047     }
 4048     return false;
 4049   }
 4050 };
 4051 
 4052 static bool is_bmi_pattern(Node* n, Node* m) {
 4053   assert(UseBMI1Instructions, "sanity");
 4054   if (n != nullptr && m != nullptr) {
 4055     if (m->Opcode() == Op_LoadI) {
 4056       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4057       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4058              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4059              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4060     } else if (m->Opcode() == Op_LoadL) {
 4061       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4062       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4063              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4064              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4065     }
 4066   }
 4067   return false;
 4068 }
 4069 
 4070 // Should the matcher clone input 'm' of node 'n'?
 4071 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4072   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4073   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
 4074     mstack.push(m, Visit);
 4075     return true;
 4076   }
 4077   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4078     mstack.push(m, Visit);           // m = ShiftCntV
 4079     return true;
 4080   }
 4081   if (is_encode_and_store_pattern(n, m)) {
 4082     mstack.push(m, Visit);
 4083     return true;
 4084   }
 4085   return false;
 4086 }
 4087 
 4088 // Should the Matcher clone shifts on addressing modes, expecting them
 4089 // to be subsumed into complex addressing expressions or compute them
 4090 // into registers?
 4091 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4092   Node *off = m->in(AddPNode::Offset);
 4093   if (off->is_Con()) {
 4094     address_visited.test_set(m->_idx); // Flag as address_visited
 4095     Node *adr = m->in(AddPNode::Address);
 4096 
 4097     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4098     // AtomicAdd is not an addressing expression.
 4099     // Cheap to find it by looking for screwy base.
 4100     if (adr->is_AddP() &&
 4101         !adr->in(AddPNode::Base)->is_top() &&
 4102         !adr->in(AddPNode::Offset)->is_Con() &&
 4103         off->get_long() == (int) (off->get_long()) && // immL32
 4104         // Are there other uses besides address expressions?
 4105         !is_visited(adr)) {
 4106       address_visited.set(adr->_idx); // Flag as address_visited
 4107       Node *shift = adr->in(AddPNode::Offset);
 4108       if (!clone_shift(shift, this, mstack, address_visited)) {
 4109         mstack.push(shift, Pre_Visit);
 4110       }
 4111       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4112       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4113     } else {
 4114       mstack.push(adr, Pre_Visit);
 4115     }
 4116 
 4117     // Clone X+offset as it also folds into most addressing expressions
 4118     mstack.push(off, Visit);
 4119     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4120     return true;
 4121   } else if (clone_shift(off, this, mstack, address_visited)) {
 4122     address_visited.test_set(m->_idx); // Flag as address_visited
 4123     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4124     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4125     return true;
 4126   }
 4127   return false;
 4128 }
 4129 
 4130 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4131   switch (bt) {
 4132     case BoolTest::eq:
 4133       return Assembler::eq;
 4134     case BoolTest::ne:
 4135       return Assembler::neq;
 4136     case BoolTest::le:
 4137     case BoolTest::ule:
 4138       return Assembler::le;
 4139     case BoolTest::ge:
 4140     case BoolTest::uge:
 4141       return Assembler::nlt;
 4142     case BoolTest::lt:
 4143     case BoolTest::ult:
 4144       return Assembler::lt;
 4145     case BoolTest::gt:
 4146     case BoolTest::ugt:
 4147       return Assembler::nle;
 4148     default : ShouldNotReachHere(); return Assembler::_false;
 4149   }
 4150 }
 4151 
 4152 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4153   switch (bt) {
 4154   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4155   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4156   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4157   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4158   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4159   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4160   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4161   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4162   }
 4163 }
 4164 
 4165 // Helper methods for MachSpillCopyNode::implementation().
 4166 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4167                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4168   assert(ireg == Op_VecS || // 32bit vector
 4169          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4170           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4171          "no non-adjacent vector moves" );
 4172   if (masm) {
 4173     switch (ireg) {
 4174     case Op_VecS: // copy whole register
 4175     case Op_VecD:
 4176     case Op_VecX:
 4177       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4178         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4179       } else {
 4180         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4181      }
 4182       break;
 4183     case Op_VecY:
 4184       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4185         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4186       } else {
 4187         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4188      }
 4189       break;
 4190     case Op_VecZ:
 4191       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4192       break;
 4193     default:
 4194       ShouldNotReachHere();
 4195     }
 4196 #ifndef PRODUCT
 4197   } else {
 4198     switch (ireg) {
 4199     case Op_VecS:
 4200     case Op_VecD:
 4201     case Op_VecX:
 4202       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4203       break;
 4204     case Op_VecY:
 4205     case Op_VecZ:
 4206       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4207       break;
 4208     default:
 4209       ShouldNotReachHere();
 4210     }
 4211 #endif
 4212   }
 4213 }
 4214 
 4215 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4216                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4217   if (masm) {
 4218     if (is_load) {
 4219       switch (ireg) {
 4220       case Op_VecS:
 4221         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4222         break;
 4223       case Op_VecD:
 4224         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4225         break;
 4226       case Op_VecX:
 4227         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4228           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4229         } else {
 4230           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4231           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4232         }
 4233         break;
 4234       case Op_VecY:
 4235         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4236           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4237         } else {
 4238           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4239           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4240         }
 4241         break;
 4242       case Op_VecZ:
 4243         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4244         break;
 4245       default:
 4246         ShouldNotReachHere();
 4247       }
 4248     } else { // store
 4249       switch (ireg) {
 4250       case Op_VecS:
 4251         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4252         break;
 4253       case Op_VecD:
 4254         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4255         break;
 4256       case Op_VecX:
 4257         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4258           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4259         }
 4260         else {
 4261           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4262         }
 4263         break;
 4264       case Op_VecY:
 4265         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4266           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4267         }
 4268         else {
 4269           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4270         }
 4271         break;
 4272       case Op_VecZ:
 4273         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4274         break;
 4275       default:
 4276         ShouldNotReachHere();
 4277       }
 4278     }
 4279 #ifndef PRODUCT
 4280   } else {
 4281     if (is_load) {
 4282       switch (ireg) {
 4283       case Op_VecS:
 4284         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4285         break;
 4286       case Op_VecD:
 4287         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4288         break;
 4289        case Op_VecX:
 4290         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4291         break;
 4292       case Op_VecY:
 4293       case Op_VecZ:
 4294         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4295         break;
 4296       default:
 4297         ShouldNotReachHere();
 4298       }
 4299     } else { // store
 4300       switch (ireg) {
 4301       case Op_VecS:
 4302         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4303         break;
 4304       case Op_VecD:
 4305         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4306         break;
 4307        case Op_VecX:
 4308         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4309         break;
 4310       case Op_VecY:
 4311       case Op_VecZ:
 4312         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4313         break;
 4314       default:
 4315         ShouldNotReachHere();
 4316       }
 4317     }
 4318 #endif
 4319   }
 4320 }
 4321 
 4322 template <class T>
 4323 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4324   int size = type2aelembytes(bt) * len;
 4325   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4326   for (int i = 0; i < len; i++) {
 4327     int offset = i * type2aelembytes(bt);
 4328     switch (bt) {
 4329       case T_BYTE: val->at(i) = con; break;
 4330       case T_SHORT: {
 4331         jshort c = con;
 4332         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4333         break;
 4334       }
 4335       case T_INT: {
 4336         jint c = con;
 4337         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4338         break;
 4339       }
 4340       case T_LONG: {
 4341         jlong c = con;
 4342         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4343         break;
 4344       }
 4345       case T_FLOAT: {
 4346         jfloat c = con;
 4347         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4348         break;
 4349       }
 4350       case T_DOUBLE: {
 4351         jdouble c = con;
 4352         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4353         break;
 4354       }
 4355       default: assert(false, "%s", type2name(bt));
 4356     }
 4357   }
 4358   return val;
 4359 }
 4360 
 4361 static inline jlong high_bit_set(BasicType bt) {
 4362   switch (bt) {
 4363     case T_BYTE:  return 0x8080808080808080;
 4364     case T_SHORT: return 0x8000800080008000;
 4365     case T_INT:   return 0x8000000080000000;
 4366     case T_LONG:  return 0x8000000000000000;
 4367     default:
 4368       ShouldNotReachHere();
 4369       return 0;
 4370   }
 4371 }
 4372 
 4373 #ifndef PRODUCT
 4374   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4375     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4376   }
 4377 #endif
 4378 
 4379   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4380     __ nop(_count);
 4381   }
 4382 
 4383   uint MachNopNode::size(PhaseRegAlloc*) const {
 4384     return _count;
 4385   }
 4386 
 4387 #ifndef PRODUCT
 4388   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4389     st->print("# breakpoint");
 4390   }
 4391 #endif
 4392 
 4393   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4394     __ int3();
 4395   }
 4396 
 4397   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4398     return MachNode::size(ra_);
 4399   }
 4400 
 4401 %}
 4402 
 4403 //----------ENCODING BLOCK-----------------------------------------------------
 4404 // This block specifies the encoding classes used by the compiler to
 4405 // output byte streams.  Encoding classes are parameterized macros
 4406 // used by Machine Instruction Nodes in order to generate the bit
 4407 // encoding of the instruction.  Operands specify their base encoding
 4408 // interface with the interface keyword.  There are currently
 4409 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4410 // COND_INTER.  REG_INTER causes an operand to generate a function
 4411 // which returns its register number when queried.  CONST_INTER causes
 4412 // an operand to generate a function which returns the value of the
 4413 // constant when queried.  MEMORY_INTER causes an operand to generate
 4414 // four functions which return the Base Register, the Index Register,
 4415 // the Scale Value, and the Offset Value of the operand when queried.
 4416 // COND_INTER causes an operand to generate six functions which return
 4417 // the encoding code (ie - encoding bits for the instruction)
 4418 // associated with each basic boolean condition for a conditional
 4419 // instruction.
 4420 //
 4421 // Instructions specify two basic values for encoding.  Again, a
 4422 // function is available to check if the constant displacement is an
 4423 // oop. They use the ins_encode keyword to specify their encoding
 4424 // classes (which must be a sequence of enc_class names, and their
 4425 // parameters, specified in the encoding block), and they use the
 4426 // opcode keyword to specify, in order, their primary, secondary, and
 4427 // tertiary opcode.  Only the opcode sections which a particular
 4428 // instruction needs for encoding need to be specified.
 4429 encode %{
 4430   enc_class cdql_enc(no_rax_rdx_RegI div)
 4431   %{
 4432     // Full implementation of Java idiv and irem; checks for
 4433     // special case as described in JVM spec., p.243 & p.271.
 4434     //
 4435     //         normal case                           special case
 4436     //
 4437     // input : rax: dividend                         min_int
 4438     //         reg: divisor                          -1
 4439     //
 4440     // output: rax: quotient  (= rax idiv reg)       min_int
 4441     //         rdx: remainder (= rax irem reg)       0
 4442     //
 4443     //  Code sequnce:
 4444     //
 4445     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4446     //    5:   75 07/08                jne    e <normal>
 4447     //    7:   33 d2                   xor    %edx,%edx
 4448     //  [div >= 8 -> offset + 1]
 4449     //  [REX_B]
 4450     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4451     //    c:   74 03/04                je     11 <done>
 4452     // 000000000000000e <normal>:
 4453     //    e:   99                      cltd
 4454     //  [div >= 8 -> offset + 1]
 4455     //  [REX_B]
 4456     //    f:   f7 f9                   idiv   $div
 4457     // 0000000000000011 <done>:
 4458     Label normal;
 4459     Label done;
 4460 
 4461     // cmp    $0x80000000,%eax
 4462     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4463 
 4464     // jne    e <normal>
 4465     __ jccb(Assembler::notEqual, normal);
 4466 
 4467     // xor    %edx,%edx
 4468     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4469 
 4470     // cmp    $0xffffffffffffffff,%ecx
 4471     __ cmpl($div$$Register, -1);
 4472 
 4473     // je     11 <done>
 4474     __ jccb(Assembler::equal, done);
 4475 
 4476     // <normal>
 4477     // cltd
 4478     __ bind(normal);
 4479     __ cdql();
 4480 
 4481     // idivl
 4482     // <done>
 4483     __ idivl($div$$Register);
 4484     __ bind(done);
 4485   %}
 4486 
 4487   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4488   %{
 4489     // Full implementation of Java ldiv and lrem; checks for
 4490     // special case as described in JVM spec., p.243 & p.271.
 4491     //
 4492     //         normal case                           special case
 4493     //
 4494     // input : rax: dividend                         min_long
 4495     //         reg: divisor                          -1
 4496     //
 4497     // output: rax: quotient  (= rax idiv reg)       min_long
 4498     //         rdx: remainder (= rax irem reg)       0
 4499     //
 4500     //  Code sequnce:
 4501     //
 4502     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4503     //    7:   00 00 80
 4504     //    a:   48 39 d0                cmp    %rdx,%rax
 4505     //    d:   75 08                   jne    17 <normal>
 4506     //    f:   33 d2                   xor    %edx,%edx
 4507     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4508     //   15:   74 05                   je     1c <done>
 4509     // 0000000000000017 <normal>:
 4510     //   17:   48 99                   cqto
 4511     //   19:   48 f7 f9                idiv   $div
 4512     // 000000000000001c <done>:
 4513     Label normal;
 4514     Label done;
 4515 
 4516     // mov    $0x8000000000000000,%rdx
 4517     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4518 
 4519     // cmp    %rdx,%rax
 4520     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4521 
 4522     // jne    17 <normal>
 4523     __ jccb(Assembler::notEqual, normal);
 4524 
 4525     // xor    %edx,%edx
 4526     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4527 
 4528     // cmp    $0xffffffffffffffff,$div
 4529     __ cmpq($div$$Register, -1);
 4530 
 4531     // je     1e <done>
 4532     __ jccb(Assembler::equal, done);
 4533 
 4534     // <normal>
 4535     // cqto
 4536     __ bind(normal);
 4537     __ cdqq();
 4538 
 4539     // idivq (note: must be emitted by the user of this rule)
 4540     // <done>
 4541     __ idivq($div$$Register);
 4542     __ bind(done);
 4543   %}
 4544 
 4545   enc_class clear_avx %{
 4546     DEBUG_ONLY(int off0 = __ offset());
 4547     if (generate_vzeroupper(Compile::current())) {
 4548       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4549       // Clear upper bits of YMM registers when current compiled code uses
 4550       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4551       __ vzeroupper();
 4552     }
 4553     DEBUG_ONLY(int off1 = __ offset());
 4554     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4555   %}
 4556 
 4557   enc_class Java_To_Runtime(method meth) %{
 4558     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4559     __ call(r10);
 4560     __ post_call_nop();
 4561   %}
 4562 
 4563   enc_class Java_Static_Call(method meth)
 4564   %{
 4565     // JAVA STATIC CALL
 4566     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4567     // determine who we intended to call.
 4568     if (!_method) {
 4569       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4570     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4571       // The NOP here is purely to ensure that eliding a call to
 4572       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4573       __ addr_nop_5();
 4574       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4575     } else {
 4576       int method_index = resolved_method_index(masm);
 4577       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4578                                                   : static_call_Relocation::spec(method_index);
 4579       address mark = __ pc();
 4580       int call_offset = __ offset();
 4581       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4582       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4583         // Calls of the same statically bound method can share
 4584         // a stub to the interpreter.
 4585         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4586       } else {
 4587         // Emit stubs for static call.
 4588         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4589         __ clear_inst_mark();
 4590         if (stub == nullptr) {
 4591           ciEnv::current()->record_failure("CodeCache is full");
 4592           return;
 4593         }
 4594       }
 4595     }
 4596     __ post_call_nop();
 4597   %}
 4598 
 4599   enc_class Java_Dynamic_Call(method meth) %{
 4600     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4601     __ post_call_nop();
 4602   %}
 4603 
 4604   enc_class call_epilog %{
 4605     if (VerifyStackAtCalls) {
 4606       // Check that stack depth is unchanged: find majik cookie on stack
 4607       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4608       Label L;
 4609       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4610       __ jccb(Assembler::equal, L);
 4611       // Die if stack mismatch
 4612       __ int3();
 4613       __ bind(L);
 4614     }
 4615     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4616       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4617       // Search for the corresponding projection, get the register and emit code that initialized it.
 4618       uint con = (tf()->range_cc()->cnt() - 1);
 4619       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4620         ProjNode* proj = fast_out(i)->as_Proj();
 4621         if (proj->_con == con) {
 4622           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4623           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4624           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4625           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4626           __ testq(rax, rax);
 4627           __ setb(Assembler::notZero, toReg);
 4628           __ movzbl(toReg, toReg);
 4629           if (reg->is_stack()) {
 4630             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4631             __ movq(Address(rsp, st_off), toReg);
 4632           }
 4633           break;
 4634         }
 4635       }
 4636       if (return_value_is_used()) {
 4637         // An inline type is returned as fields in multiple registers.
 4638         // Rax either contains an oop if the inline type is buffered or a pointer
 4639         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4640         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4641         // rax &= (rax & 1) - 1
 4642         __ movptr(rscratch1, rax);
 4643         __ andptr(rscratch1, 0x1);
 4644         __ subptr(rscratch1, 0x1);
 4645         __ andptr(rax, rscratch1);
 4646       }
 4647     }
 4648   %}
 4649 
 4650 %}
 4651 
 4652 //----------FRAME--------------------------------------------------------------
 4653 // Definition of frame structure and management information.
 4654 //
 4655 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4656 //                             |   (to get allocators register number
 4657 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4658 //  r   CALLER     |        |
 4659 //  o     |        +--------+      pad to even-align allocators stack-slot
 4660 //  w     V        |  pad0  |        numbers; owned by CALLER
 4661 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4662 //  h     ^        |   in   |  5
 4663 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4664 //  |     |        |        |  3
 4665 //  |     |        +--------+
 4666 //  V     |        | old out|      Empty on Intel, window on Sparc
 4667 //        |    old |preserve|      Must be even aligned.
 4668 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4669 //        |        |   in   |  3   area for Intel ret address
 4670 //     Owned by    |preserve|      Empty on Sparc.
 4671 //       SELF      +--------+
 4672 //        |        |  pad2  |  2   pad to align old SP
 4673 //        |        +--------+  1
 4674 //        |        | locks  |  0
 4675 //        |        +--------+----> OptoReg::stack0(), even aligned
 4676 //        |        |  pad1  | 11   pad to align new SP
 4677 //        |        +--------+
 4678 //        |        |        | 10
 4679 //        |        | spills |  9   spills
 4680 //        V        |        |  8   (pad0 slot for callee)
 4681 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4682 //        ^        |  out   |  7
 4683 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4684 //     Owned by    +--------+
 4685 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4686 //        |    new |preserve|      Must be even-aligned.
 4687 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4688 //        |        |        |
 4689 //
 4690 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4691 //         known from SELF's arguments and the Java calling convention.
 4692 //         Region 6-7 is determined per call site.
 4693 // Note 2: If the calling convention leaves holes in the incoming argument
 4694 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4695 //         are owned by the CALLEE.  Holes should not be necessary in the
 4696 //         incoming area, as the Java calling convention is completely under
 4697 //         the control of the AD file.  Doubles can be sorted and packed to
 4698 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4699 //         varargs C calling conventions.
 4700 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4701 //         even aligned with pad0 as needed.
 4702 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4703 //         region 6-11 is even aligned; it may be padded out more so that
 4704 //         the region from SP to FP meets the minimum stack alignment.
 4705 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4706 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4707 //         SP meets the minimum alignment.
 4708 
 4709 frame
 4710 %{
 4711   // These three registers define part of the calling convention
 4712   // between compiled code and the interpreter.
 4713   inline_cache_reg(RAX);                // Inline Cache Register
 4714 
 4715   // Optional: name the operand used by cisc-spilling to access
 4716   // [stack_pointer + offset]
 4717   cisc_spilling_operand_name(indOffset32);
 4718 
 4719   // Number of stack slots consumed by locking an object
 4720   sync_stack_slots(2);
 4721 
 4722   // Compiled code's Frame Pointer
 4723   frame_pointer(RSP);
 4724 
 4725   // Interpreter stores its frame pointer in a register which is
 4726   // stored to the stack by I2CAdaptors.
 4727   // I2CAdaptors convert from interpreted java to compiled java.
 4728   interpreter_frame_pointer(RBP);
 4729 
 4730   // Stack alignment requirement
 4731   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4732 
 4733   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4734   // for calls to C.  Supports the var-args backing area for register parms.
 4735   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4736 
 4737   // The after-PROLOG location of the return address.  Location of
 4738   // return address specifies a type (REG or STACK) and a number
 4739   // representing the register number (i.e. - use a register name) or
 4740   // stack slot.
 4741   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4742   // Otherwise, it is above the locks and verification slot and alignment word
 4743   return_addr(STACK - 2 +
 4744               align_up((Compile::current()->in_preserve_stack_slots() +
 4745                         Compile::current()->fixed_slots()),
 4746                        stack_alignment_in_slots()));
 4747 
 4748   // Location of compiled Java return values.  Same as C for now.
 4749   return_value
 4750   %{
 4751     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4752            "only return normal values");
 4753 
 4754     static const int lo[Op_RegL + 1] = {
 4755       0,
 4756       0,
 4757       RAX_num,  // Op_RegN
 4758       RAX_num,  // Op_RegI
 4759       RAX_num,  // Op_RegP
 4760       XMM0_num, // Op_RegF
 4761       XMM0_num, // Op_RegD
 4762       RAX_num   // Op_RegL
 4763     };
 4764     static const int hi[Op_RegL + 1] = {
 4765       0,
 4766       0,
 4767       OptoReg::Bad, // Op_RegN
 4768       OptoReg::Bad, // Op_RegI
 4769       RAX_H_num,    // Op_RegP
 4770       OptoReg::Bad, // Op_RegF
 4771       XMM0b_num,    // Op_RegD
 4772       RAX_H_num     // Op_RegL
 4773     };
 4774     // Excluded flags and vector registers.
 4775     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4776     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4777   %}
 4778 %}
 4779 
 4780 //----------ATTRIBUTES---------------------------------------------------------
 4781 //----------Operand Attributes-------------------------------------------------
 4782 op_attrib op_cost(0);        // Required cost attribute
 4783 
 4784 //----------Instruction Attributes---------------------------------------------
 4785 ins_attrib ins_cost(100);       // Required cost attribute
 4786 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4787 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4788                                 // a non-matching short branch variant
 4789                                 // of some long branch?
 4790 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4791                                 // be a power of 2) specifies the
 4792                                 // alignment that some part of the
 4793                                 // instruction (not necessarily the
 4794                                 // start) requires.  If > 1, a
 4795                                 // compute_padding() function must be
 4796                                 // provided for the instruction
 4797 
 4798 // Whether this node is expanded during code emission into a sequence of
 4799 // instructions and the first instruction can perform an implicit null check.
 4800 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4801 
 4802 //----------OPERANDS-----------------------------------------------------------
 4803 // Operand definitions must precede instruction definitions for correct parsing
 4804 // in the ADLC because operands constitute user defined types which are used in
 4805 // instruction definitions.
 4806 
 4807 //----------Simple Operands----------------------------------------------------
 4808 // Immediate Operands
 4809 // Integer Immediate
 4810 operand immI()
 4811 %{
 4812   match(ConI);
 4813 
 4814   op_cost(10);
 4815   format %{ %}
 4816   interface(CONST_INTER);
 4817 %}
 4818 
 4819 // Constant for test vs zero
 4820 operand immI_0()
 4821 %{
 4822   predicate(n->get_int() == 0);
 4823   match(ConI);
 4824 
 4825   op_cost(0);
 4826   format %{ %}
 4827   interface(CONST_INTER);
 4828 %}
 4829 
 4830 // Constant for increment
 4831 operand immI_1()
 4832 %{
 4833   predicate(n->get_int() == 1);
 4834   match(ConI);
 4835 
 4836   op_cost(0);
 4837   format %{ %}
 4838   interface(CONST_INTER);
 4839 %}
 4840 
 4841 // Constant for decrement
 4842 operand immI_M1()
 4843 %{
 4844   predicate(n->get_int() == -1);
 4845   match(ConI);
 4846 
 4847   op_cost(0);
 4848   format %{ %}
 4849   interface(CONST_INTER);
 4850 %}
 4851 
 4852 operand immI_2()
 4853 %{
 4854   predicate(n->get_int() == 2);
 4855   match(ConI);
 4856 
 4857   op_cost(0);
 4858   format %{ %}
 4859   interface(CONST_INTER);
 4860 %}
 4861 
 4862 operand immI_4()
 4863 %{
 4864   predicate(n->get_int() == 4);
 4865   match(ConI);
 4866 
 4867   op_cost(0);
 4868   format %{ %}
 4869   interface(CONST_INTER);
 4870 %}
 4871 
 4872 operand immI_8()
 4873 %{
 4874   predicate(n->get_int() == 8);
 4875   match(ConI);
 4876 
 4877   op_cost(0);
 4878   format %{ %}
 4879   interface(CONST_INTER);
 4880 %}
 4881 
 4882 // Valid scale values for addressing modes
 4883 operand immI2()
 4884 %{
 4885   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4886   match(ConI);
 4887 
 4888   format %{ %}
 4889   interface(CONST_INTER);
 4890 %}
 4891 
 4892 operand immU7()
 4893 %{
 4894   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4895   match(ConI);
 4896 
 4897   op_cost(5);
 4898   format %{ %}
 4899   interface(CONST_INTER);
 4900 %}
 4901 
 4902 operand immI8()
 4903 %{
 4904   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4905   match(ConI);
 4906 
 4907   op_cost(5);
 4908   format %{ %}
 4909   interface(CONST_INTER);
 4910 %}
 4911 
 4912 operand immU8()
 4913 %{
 4914   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4915   match(ConI);
 4916 
 4917   op_cost(5);
 4918   format %{ %}
 4919   interface(CONST_INTER);
 4920 %}
 4921 
 4922 operand immI16()
 4923 %{
 4924   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4925   match(ConI);
 4926 
 4927   op_cost(10);
 4928   format %{ %}
 4929   interface(CONST_INTER);
 4930 %}
 4931 
 4932 // Int Immediate non-negative
 4933 operand immU31()
 4934 %{
 4935   predicate(n->get_int() >= 0);
 4936   match(ConI);
 4937 
 4938   op_cost(0);
 4939   format %{ %}
 4940   interface(CONST_INTER);
 4941 %}
 4942 
 4943 // Pointer Immediate
 4944 operand immP()
 4945 %{
 4946   match(ConP);
 4947 
 4948   op_cost(10);
 4949   format %{ %}
 4950   interface(CONST_INTER);
 4951 %}
 4952 
 4953 // Null Pointer Immediate
 4954 operand immP0()
 4955 %{
 4956   predicate(n->get_ptr() == 0);
 4957   match(ConP);
 4958 
 4959   op_cost(5);
 4960   format %{ %}
 4961   interface(CONST_INTER);
 4962 %}
 4963 
 4964 // Pointer Immediate
 4965 operand immN() %{
 4966   match(ConN);
 4967 
 4968   op_cost(10);
 4969   format %{ %}
 4970   interface(CONST_INTER);
 4971 %}
 4972 
 4973 operand immNKlass() %{
 4974   match(ConNKlass);
 4975 
 4976   op_cost(10);
 4977   format %{ %}
 4978   interface(CONST_INTER);
 4979 %}
 4980 
 4981 // Null Pointer Immediate
 4982 operand immN0() %{
 4983   predicate(n->get_narrowcon() == 0);
 4984   match(ConN);
 4985 
 4986   op_cost(5);
 4987   format %{ %}
 4988   interface(CONST_INTER);
 4989 %}
 4990 
 4991 operand immP31()
 4992 %{
 4993   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4994             && (n->get_ptr() >> 31) == 0);
 4995   match(ConP);
 4996 
 4997   op_cost(5);
 4998   format %{ %}
 4999   interface(CONST_INTER);
 5000 %}
 5001 
 5002 
 5003 // Long Immediate
 5004 operand immL()
 5005 %{
 5006   match(ConL);
 5007 
 5008   op_cost(20);
 5009   format %{ %}
 5010   interface(CONST_INTER);
 5011 %}
 5012 
 5013 // Long Immediate 8-bit
 5014 operand immL8()
 5015 %{
 5016   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 5017   match(ConL);
 5018 
 5019   op_cost(5);
 5020   format %{ %}
 5021   interface(CONST_INTER);
 5022 %}
 5023 
 5024 // Long Immediate 32-bit unsigned
 5025 operand immUL32()
 5026 %{
 5027   predicate(n->get_long() == (unsigned int) (n->get_long()));
 5028   match(ConL);
 5029 
 5030   op_cost(10);
 5031   format %{ %}
 5032   interface(CONST_INTER);
 5033 %}
 5034 
 5035 // Long Immediate 32-bit signed
 5036 operand immL32()
 5037 %{
 5038   predicate(n->get_long() == (int) (n->get_long()));
 5039   match(ConL);
 5040 
 5041   op_cost(15);
 5042   format %{ %}
 5043   interface(CONST_INTER);
 5044 %}
 5045 
 5046 operand immL_Pow2()
 5047 %{
 5048   predicate(is_power_of_2((julong)n->get_long()));
 5049   match(ConL);
 5050 
 5051   op_cost(15);
 5052   format %{ %}
 5053   interface(CONST_INTER);
 5054 %}
 5055 
 5056 operand immL_NotPow2()
 5057 %{
 5058   predicate(is_power_of_2((julong)~n->get_long()));
 5059   match(ConL);
 5060 
 5061   op_cost(15);
 5062   format %{ %}
 5063   interface(CONST_INTER);
 5064 %}
 5065 
 5066 // Long Immediate zero
 5067 operand immL0()
 5068 %{
 5069   predicate(n->get_long() == 0L);
 5070   match(ConL);
 5071 
 5072   op_cost(10);
 5073   format %{ %}
 5074   interface(CONST_INTER);
 5075 %}
 5076 
 5077 // Constant for increment
 5078 operand immL1()
 5079 %{
 5080   predicate(n->get_long() == 1);
 5081   match(ConL);
 5082 
 5083   format %{ %}
 5084   interface(CONST_INTER);
 5085 %}
 5086 
 5087 // Constant for decrement
 5088 operand immL_M1()
 5089 %{
 5090   predicate(n->get_long() == -1);
 5091   match(ConL);
 5092 
 5093   format %{ %}
 5094   interface(CONST_INTER);
 5095 %}
 5096 
 5097 // Long Immediate: low 32-bit mask
 5098 operand immL_32bits()
 5099 %{
 5100   predicate(n->get_long() == 0xFFFFFFFFL);
 5101   match(ConL);
 5102   op_cost(20);
 5103 
 5104   format %{ %}
 5105   interface(CONST_INTER);
 5106 %}
 5107 
 5108 // Int Immediate: 2^n-1, positive
 5109 operand immI_Pow2M1()
 5110 %{
 5111   predicate((n->get_int() > 0)
 5112             && is_power_of_2((juint)n->get_int() + 1));
 5113   match(ConI);
 5114 
 5115   op_cost(20);
 5116   format %{ %}
 5117   interface(CONST_INTER);
 5118 %}
 5119 
 5120 // Float Immediate zero
 5121 operand immF0()
 5122 %{
 5123   predicate(jint_cast(n->getf()) == 0);
 5124   match(ConF);
 5125 
 5126   op_cost(5);
 5127   format %{ %}
 5128   interface(CONST_INTER);
 5129 %}
 5130 
 5131 // Float Immediate
 5132 operand immF()
 5133 %{
 5134   match(ConF);
 5135 
 5136   op_cost(15);
 5137   format %{ %}
 5138   interface(CONST_INTER);
 5139 %}
 5140 
 5141 // Half Float Immediate
 5142 operand immH()
 5143 %{
 5144   match(ConH);
 5145 
 5146   op_cost(15);
 5147   format %{ %}
 5148   interface(CONST_INTER);
 5149 %}
 5150 
 5151 // Double Immediate zero
 5152 operand immD0()
 5153 %{
 5154   predicate(jlong_cast(n->getd()) == 0);
 5155   match(ConD);
 5156 
 5157   op_cost(5);
 5158   format %{ %}
 5159   interface(CONST_INTER);
 5160 %}
 5161 
 5162 // Double Immediate
 5163 operand immD()
 5164 %{
 5165   match(ConD);
 5166 
 5167   op_cost(15);
 5168   format %{ %}
 5169   interface(CONST_INTER);
 5170 %}
 5171 
 5172 // Immediates for special shifts (sign extend)
 5173 
 5174 // Constants for increment
 5175 operand immI_16()
 5176 %{
 5177   predicate(n->get_int() == 16);
 5178   match(ConI);
 5179 
 5180   format %{ %}
 5181   interface(CONST_INTER);
 5182 %}
 5183 
 5184 operand immI_24()
 5185 %{
 5186   predicate(n->get_int() == 24);
 5187   match(ConI);
 5188 
 5189   format %{ %}
 5190   interface(CONST_INTER);
 5191 %}
 5192 
 5193 // Constant for byte-wide masking
 5194 operand immI_255()
 5195 %{
 5196   predicate(n->get_int() == 255);
 5197   match(ConI);
 5198 
 5199   format %{ %}
 5200   interface(CONST_INTER);
 5201 %}
 5202 
 5203 // Constant for short-wide masking
 5204 operand immI_65535()
 5205 %{
 5206   predicate(n->get_int() == 65535);
 5207   match(ConI);
 5208 
 5209   format %{ %}
 5210   interface(CONST_INTER);
 5211 %}
 5212 
 5213 // Constant for byte-wide masking
 5214 operand immL_255()
 5215 %{
 5216   predicate(n->get_long() == 255);
 5217   match(ConL);
 5218 
 5219   format %{ %}
 5220   interface(CONST_INTER);
 5221 %}
 5222 
 5223 // Constant for short-wide masking
 5224 operand immL_65535()
 5225 %{
 5226   predicate(n->get_long() == 65535);
 5227   match(ConL);
 5228 
 5229   format %{ %}
 5230   interface(CONST_INTER);
 5231 %}
 5232 
 5233 operand kReg()
 5234 %{
 5235   constraint(ALLOC_IN_RC(vectmask_reg));
 5236   match(RegVectMask);
 5237   format %{%}
 5238   interface(REG_INTER);
 5239 %}
 5240 
 5241 // Register Operands
 5242 // Integer Register
 5243 operand rRegI()
 5244 %{
 5245   constraint(ALLOC_IN_RC(int_reg));
 5246   match(RegI);
 5247 
 5248   match(rax_RegI);
 5249   match(rbx_RegI);
 5250   match(rcx_RegI);
 5251   match(rdx_RegI);
 5252   match(rdi_RegI);
 5253 
 5254   format %{ %}
 5255   interface(REG_INTER);
 5256 %}
 5257 
 5258 // Special Registers
 5259 operand rax_RegI()
 5260 %{
 5261   constraint(ALLOC_IN_RC(int_rax_reg));
 5262   match(RegI);
 5263   match(rRegI);
 5264 
 5265   format %{ "RAX" %}
 5266   interface(REG_INTER);
 5267 %}
 5268 
 5269 // Special Registers
 5270 operand rbx_RegI()
 5271 %{
 5272   constraint(ALLOC_IN_RC(int_rbx_reg));
 5273   match(RegI);
 5274   match(rRegI);
 5275 
 5276   format %{ "RBX" %}
 5277   interface(REG_INTER);
 5278 %}
 5279 
 5280 operand rcx_RegI()
 5281 %{
 5282   constraint(ALLOC_IN_RC(int_rcx_reg));
 5283   match(RegI);
 5284   match(rRegI);
 5285 
 5286   format %{ "RCX" %}
 5287   interface(REG_INTER);
 5288 %}
 5289 
 5290 operand rdx_RegI()
 5291 %{
 5292   constraint(ALLOC_IN_RC(int_rdx_reg));
 5293   match(RegI);
 5294   match(rRegI);
 5295 
 5296   format %{ "RDX" %}
 5297   interface(REG_INTER);
 5298 %}
 5299 
 5300 operand rdi_RegI()
 5301 %{
 5302   constraint(ALLOC_IN_RC(int_rdi_reg));
 5303   match(RegI);
 5304   match(rRegI);
 5305 
 5306   format %{ "RDI" %}
 5307   interface(REG_INTER);
 5308 %}
 5309 
 5310 operand no_rax_rdx_RegI()
 5311 %{
 5312   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5313   match(RegI);
 5314   match(rbx_RegI);
 5315   match(rcx_RegI);
 5316   match(rdi_RegI);
 5317 
 5318   format %{ %}
 5319   interface(REG_INTER);
 5320 %}
 5321 
 5322 operand no_rbp_r13_RegI()
 5323 %{
 5324   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5325   match(RegI);
 5326   match(rRegI);
 5327   match(rax_RegI);
 5328   match(rbx_RegI);
 5329   match(rcx_RegI);
 5330   match(rdx_RegI);
 5331   match(rdi_RegI);
 5332 
 5333   format %{ %}
 5334   interface(REG_INTER);
 5335 %}
 5336 
 5337 // Pointer Register
 5338 operand any_RegP()
 5339 %{
 5340   constraint(ALLOC_IN_RC(any_reg));
 5341   match(RegP);
 5342   match(rax_RegP);
 5343   match(rbx_RegP);
 5344   match(rdi_RegP);
 5345   match(rsi_RegP);
 5346   match(rbp_RegP);
 5347   match(r15_RegP);
 5348   match(rRegP);
 5349 
 5350   format %{ %}
 5351   interface(REG_INTER);
 5352 %}
 5353 
 5354 operand rRegP()
 5355 %{
 5356   constraint(ALLOC_IN_RC(ptr_reg));
 5357   match(RegP);
 5358   match(rax_RegP);
 5359   match(rbx_RegP);
 5360   match(rdi_RegP);
 5361   match(rsi_RegP);
 5362   match(rbp_RegP);  // See Q&A below about
 5363   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5364 
 5365   format %{ %}
 5366   interface(REG_INTER);
 5367 %}
 5368 
 5369 operand rRegN() %{
 5370   constraint(ALLOC_IN_RC(int_reg));
 5371   match(RegN);
 5372 
 5373   format %{ %}
 5374   interface(REG_INTER);
 5375 %}
 5376 
 5377 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5378 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5379 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5380 // The output of an instruction is controlled by the allocator, which respects
 5381 // register class masks, not match rules.  Unless an instruction mentions
 5382 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5383 // by the allocator as an input.
 5384 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5385 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5386 // result, RBP is not included in the output of the instruction either.
 5387 
 5388 // This operand is not allowed to use RBP even if
 5389 // RBP is not used to hold the frame pointer.
 5390 operand no_rbp_RegP()
 5391 %{
 5392   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5393   match(RegP);
 5394   match(rbx_RegP);
 5395   match(rsi_RegP);
 5396   match(rdi_RegP);
 5397 
 5398   format %{ %}
 5399   interface(REG_INTER);
 5400 %}
 5401 
 5402 // Special Registers
 5403 // Return a pointer value
 5404 operand rax_RegP()
 5405 %{
 5406   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5407   match(RegP);
 5408   match(rRegP);
 5409 
 5410   format %{ %}
 5411   interface(REG_INTER);
 5412 %}
 5413 
 5414 // Special Registers
 5415 // Return a compressed pointer value
 5416 operand rax_RegN()
 5417 %{
 5418   constraint(ALLOC_IN_RC(int_rax_reg));
 5419   match(RegN);
 5420   match(rRegN);
 5421 
 5422   format %{ %}
 5423   interface(REG_INTER);
 5424 %}
 5425 
 5426 // Used in AtomicAdd
 5427 operand rbx_RegP()
 5428 %{
 5429   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5430   match(RegP);
 5431   match(rRegP);
 5432 
 5433   format %{ %}
 5434   interface(REG_INTER);
 5435 %}
 5436 
 5437 operand rsi_RegP()
 5438 %{
 5439   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5440   match(RegP);
 5441   match(rRegP);
 5442 
 5443   format %{ %}
 5444   interface(REG_INTER);
 5445 %}
 5446 
 5447 operand rbp_RegP()
 5448 %{
 5449   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5450   match(RegP);
 5451   match(rRegP);
 5452 
 5453   format %{ %}
 5454   interface(REG_INTER);
 5455 %}
 5456 
 5457 // Used in rep stosq
 5458 operand rdi_RegP()
 5459 %{
 5460   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5461   match(RegP);
 5462   match(rRegP);
 5463 
 5464   format %{ %}
 5465   interface(REG_INTER);
 5466 %}
 5467 
 5468 operand r15_RegP()
 5469 %{
 5470   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5471   match(RegP);
 5472   match(rRegP);
 5473 
 5474   format %{ %}
 5475   interface(REG_INTER);
 5476 %}
 5477 
 5478 operand rRegL()
 5479 %{
 5480   constraint(ALLOC_IN_RC(long_reg));
 5481   match(RegL);
 5482   match(rax_RegL);
 5483   match(rdx_RegL);
 5484 
 5485   format %{ %}
 5486   interface(REG_INTER);
 5487 %}
 5488 
 5489 // Special Registers
 5490 operand no_rax_rdx_RegL()
 5491 %{
 5492   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5493   match(RegL);
 5494   match(rRegL);
 5495 
 5496   format %{ %}
 5497   interface(REG_INTER);
 5498 %}
 5499 
 5500 operand rax_RegL()
 5501 %{
 5502   constraint(ALLOC_IN_RC(long_rax_reg));
 5503   match(RegL);
 5504   match(rRegL);
 5505 
 5506   format %{ "RAX" %}
 5507   interface(REG_INTER);
 5508 %}
 5509 
 5510 operand rcx_RegL()
 5511 %{
 5512   constraint(ALLOC_IN_RC(long_rcx_reg));
 5513   match(RegL);
 5514   match(rRegL);
 5515 
 5516   format %{ %}
 5517   interface(REG_INTER);
 5518 %}
 5519 
 5520 operand rdx_RegL()
 5521 %{
 5522   constraint(ALLOC_IN_RC(long_rdx_reg));
 5523   match(RegL);
 5524   match(rRegL);
 5525 
 5526   format %{ %}
 5527   interface(REG_INTER);
 5528 %}
 5529 
 5530 operand r11_RegL()
 5531 %{
 5532   constraint(ALLOC_IN_RC(long_r11_reg));
 5533   match(RegL);
 5534   match(rRegL);
 5535 
 5536   format %{ %}
 5537   interface(REG_INTER);
 5538 %}
 5539 
 5540 operand no_rbp_r13_RegL()
 5541 %{
 5542   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5543   match(RegL);
 5544   match(rRegL);
 5545   match(rax_RegL);
 5546   match(rcx_RegL);
 5547   match(rdx_RegL);
 5548 
 5549   format %{ %}
 5550   interface(REG_INTER);
 5551 %}
 5552 
 5553 // Flags register, used as output of compare instructions
 5554 operand rFlagsReg()
 5555 %{
 5556   constraint(ALLOC_IN_RC(int_flags));
 5557   match(RegFlags);
 5558 
 5559   format %{ "RFLAGS" %}
 5560   interface(REG_INTER);
 5561 %}
 5562 
 5563 // Flags register, used as output of FLOATING POINT compare instructions
 5564 operand rFlagsRegU()
 5565 %{
 5566   constraint(ALLOC_IN_RC(int_flags));
 5567   match(RegFlags);
 5568 
 5569   format %{ "RFLAGS_U" %}
 5570   interface(REG_INTER);
 5571 %}
 5572 
 5573 operand rFlagsRegUCF() %{
 5574   constraint(ALLOC_IN_RC(int_flags));
 5575   match(RegFlags);
 5576   predicate(false);
 5577 
 5578   format %{ "RFLAGS_U_CF" %}
 5579   interface(REG_INTER);
 5580 %}
 5581 
 5582 // Float register operands
 5583 operand regF() %{
 5584    constraint(ALLOC_IN_RC(float_reg));
 5585    match(RegF);
 5586 
 5587    format %{ %}
 5588    interface(REG_INTER);
 5589 %}
 5590 
 5591 // Float register operands
 5592 operand legRegF() %{
 5593    constraint(ALLOC_IN_RC(float_reg_legacy));
 5594    match(RegF);
 5595 
 5596    format %{ %}
 5597    interface(REG_INTER);
 5598 %}
 5599 
 5600 // Float register operands
 5601 operand vlRegF() %{
 5602    constraint(ALLOC_IN_RC(float_reg_vl));
 5603    match(RegF);
 5604 
 5605    format %{ %}
 5606    interface(REG_INTER);
 5607 %}
 5608 
 5609 // Double register operands
 5610 operand regD() %{
 5611    constraint(ALLOC_IN_RC(double_reg));
 5612    match(RegD);
 5613 
 5614    format %{ %}
 5615    interface(REG_INTER);
 5616 %}
 5617 
 5618 // Double register operands
 5619 operand legRegD() %{
 5620    constraint(ALLOC_IN_RC(double_reg_legacy));
 5621    match(RegD);
 5622 
 5623    format %{ %}
 5624    interface(REG_INTER);
 5625 %}
 5626 
 5627 // Double register operands
 5628 operand vlRegD() %{
 5629    constraint(ALLOC_IN_RC(double_reg_vl));
 5630    match(RegD);
 5631 
 5632    format %{ %}
 5633    interface(REG_INTER);
 5634 %}
 5635 
 5636 //----------Memory Operands----------------------------------------------------
 5637 // Direct Memory Operand
 5638 // operand direct(immP addr)
 5639 // %{
 5640 //   match(addr);
 5641 
 5642 //   format %{ "[$addr]" %}
 5643 //   interface(MEMORY_INTER) %{
 5644 //     base(0xFFFFFFFF);
 5645 //     index(0x4);
 5646 //     scale(0x0);
 5647 //     disp($addr);
 5648 //   %}
 5649 // %}
 5650 
 5651 // Indirect Memory Operand
 5652 operand indirect(any_RegP reg)
 5653 %{
 5654   constraint(ALLOC_IN_RC(ptr_reg));
 5655   match(reg);
 5656 
 5657   format %{ "[$reg]" %}
 5658   interface(MEMORY_INTER) %{
 5659     base($reg);
 5660     index(0x4);
 5661     scale(0x0);
 5662     disp(0x0);
 5663   %}
 5664 %}
 5665 
 5666 // Indirect Memory Plus Short Offset Operand
 5667 operand indOffset8(any_RegP reg, immL8 off)
 5668 %{
 5669   constraint(ALLOC_IN_RC(ptr_reg));
 5670   match(AddP reg off);
 5671 
 5672   format %{ "[$reg + $off (8-bit)]" %}
 5673   interface(MEMORY_INTER) %{
 5674     base($reg);
 5675     index(0x4);
 5676     scale(0x0);
 5677     disp($off);
 5678   %}
 5679 %}
 5680 
 5681 // Indirect Memory Plus Long Offset Operand
 5682 operand indOffset32(any_RegP reg, immL32 off)
 5683 %{
 5684   constraint(ALLOC_IN_RC(ptr_reg));
 5685   match(AddP reg off);
 5686 
 5687   format %{ "[$reg + $off (32-bit)]" %}
 5688   interface(MEMORY_INTER) %{
 5689     base($reg);
 5690     index(0x4);
 5691     scale(0x0);
 5692     disp($off);
 5693   %}
 5694 %}
 5695 
 5696 // Indirect Memory Plus Index Register Plus Offset Operand
 5697 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5698 %{
 5699   constraint(ALLOC_IN_RC(ptr_reg));
 5700   match(AddP (AddP reg lreg) off);
 5701 
 5702   op_cost(10);
 5703   format %{"[$reg + $off + $lreg]" %}
 5704   interface(MEMORY_INTER) %{
 5705     base($reg);
 5706     index($lreg);
 5707     scale(0x0);
 5708     disp($off);
 5709   %}
 5710 %}
 5711 
 5712 // Indirect Memory Plus Index Register Plus Offset Operand
 5713 operand indIndex(any_RegP reg, rRegL lreg)
 5714 %{
 5715   constraint(ALLOC_IN_RC(ptr_reg));
 5716   match(AddP reg lreg);
 5717 
 5718   op_cost(10);
 5719   format %{"[$reg + $lreg]" %}
 5720   interface(MEMORY_INTER) %{
 5721     base($reg);
 5722     index($lreg);
 5723     scale(0x0);
 5724     disp(0x0);
 5725   %}
 5726 %}
 5727 
 5728 // Indirect Memory Times Scale Plus Index Register
 5729 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5730 %{
 5731   constraint(ALLOC_IN_RC(ptr_reg));
 5732   match(AddP reg (LShiftL lreg scale));
 5733 
 5734   op_cost(10);
 5735   format %{"[$reg + $lreg << $scale]" %}
 5736   interface(MEMORY_INTER) %{
 5737     base($reg);
 5738     index($lreg);
 5739     scale($scale);
 5740     disp(0x0);
 5741   %}
 5742 %}
 5743 
 5744 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5745 %{
 5746   constraint(ALLOC_IN_RC(ptr_reg));
 5747   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5748   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5749 
 5750   op_cost(10);
 5751   format %{"[$reg + pos $idx << $scale]" %}
 5752   interface(MEMORY_INTER) %{
 5753     base($reg);
 5754     index($idx);
 5755     scale($scale);
 5756     disp(0x0);
 5757   %}
 5758 %}
 5759 
 5760 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5761 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5762 %{
 5763   constraint(ALLOC_IN_RC(ptr_reg));
 5764   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5765 
 5766   op_cost(10);
 5767   format %{"[$reg + $off + $lreg << $scale]" %}
 5768   interface(MEMORY_INTER) %{
 5769     base($reg);
 5770     index($lreg);
 5771     scale($scale);
 5772     disp($off);
 5773   %}
 5774 %}
 5775 
 5776 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5777 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5778 %{
 5779   constraint(ALLOC_IN_RC(ptr_reg));
 5780   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5781   match(AddP (AddP reg (ConvI2L idx)) off);
 5782 
 5783   op_cost(10);
 5784   format %{"[$reg + $off + $idx]" %}
 5785   interface(MEMORY_INTER) %{
 5786     base($reg);
 5787     index($idx);
 5788     scale(0x0);
 5789     disp($off);
 5790   %}
 5791 %}
 5792 
 5793 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5794 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5795 %{
 5796   constraint(ALLOC_IN_RC(ptr_reg));
 5797   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5798   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5799 
 5800   op_cost(10);
 5801   format %{"[$reg + $off + $idx << $scale]" %}
 5802   interface(MEMORY_INTER) %{
 5803     base($reg);
 5804     index($idx);
 5805     scale($scale);
 5806     disp($off);
 5807   %}
 5808 %}
 5809 
 5810 // Indirect Narrow Oop Operand
 5811 operand indCompressedOop(rRegN reg) %{
 5812   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5813   constraint(ALLOC_IN_RC(ptr_reg));
 5814   match(DecodeN reg);
 5815 
 5816   op_cost(10);
 5817   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5818   interface(MEMORY_INTER) %{
 5819     base(0xc); // R12
 5820     index($reg);
 5821     scale(0x3);
 5822     disp(0x0);
 5823   %}
 5824 %}
 5825 
 5826 // Indirect Narrow Oop Plus Offset Operand
 5827 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5828 // we can't free r12 even with CompressedOops::base() == nullptr.
 5829 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5830   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5831   constraint(ALLOC_IN_RC(ptr_reg));
 5832   match(AddP (DecodeN reg) off);
 5833 
 5834   op_cost(10);
 5835   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5836   interface(MEMORY_INTER) %{
 5837     base(0xc); // R12
 5838     index($reg);
 5839     scale(0x3);
 5840     disp($off);
 5841   %}
 5842 %}
 5843 
 5844 // Indirect Memory Operand
 5845 operand indirectNarrow(rRegN reg)
 5846 %{
 5847   predicate(CompressedOops::shift() == 0);
 5848   constraint(ALLOC_IN_RC(ptr_reg));
 5849   match(DecodeN reg);
 5850 
 5851   format %{ "[$reg]" %}
 5852   interface(MEMORY_INTER) %{
 5853     base($reg);
 5854     index(0x4);
 5855     scale(0x0);
 5856     disp(0x0);
 5857   %}
 5858 %}
 5859 
 5860 // Indirect Memory Plus Short Offset Operand
 5861 operand indOffset8Narrow(rRegN reg, immL8 off)
 5862 %{
 5863   predicate(CompressedOops::shift() == 0);
 5864   constraint(ALLOC_IN_RC(ptr_reg));
 5865   match(AddP (DecodeN reg) off);
 5866 
 5867   format %{ "[$reg + $off (8-bit)]" %}
 5868   interface(MEMORY_INTER) %{
 5869     base($reg);
 5870     index(0x4);
 5871     scale(0x0);
 5872     disp($off);
 5873   %}
 5874 %}
 5875 
 5876 // Indirect Memory Plus Long Offset Operand
 5877 operand indOffset32Narrow(rRegN reg, immL32 off)
 5878 %{
 5879   predicate(CompressedOops::shift() == 0);
 5880   constraint(ALLOC_IN_RC(ptr_reg));
 5881   match(AddP (DecodeN reg) off);
 5882 
 5883   format %{ "[$reg + $off (32-bit)]" %}
 5884   interface(MEMORY_INTER) %{
 5885     base($reg);
 5886     index(0x4);
 5887     scale(0x0);
 5888     disp($off);
 5889   %}
 5890 %}
 5891 
 5892 // Indirect Memory Plus Index Register Plus Offset Operand
 5893 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5894 %{
 5895   predicate(CompressedOops::shift() == 0);
 5896   constraint(ALLOC_IN_RC(ptr_reg));
 5897   match(AddP (AddP (DecodeN reg) lreg) off);
 5898 
 5899   op_cost(10);
 5900   format %{"[$reg + $off + $lreg]" %}
 5901   interface(MEMORY_INTER) %{
 5902     base($reg);
 5903     index($lreg);
 5904     scale(0x0);
 5905     disp($off);
 5906   %}
 5907 %}
 5908 
 5909 // Indirect Memory Plus Index Register Plus Offset Operand
 5910 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5911 %{
 5912   predicate(CompressedOops::shift() == 0);
 5913   constraint(ALLOC_IN_RC(ptr_reg));
 5914   match(AddP (DecodeN reg) lreg);
 5915 
 5916   op_cost(10);
 5917   format %{"[$reg + $lreg]" %}
 5918   interface(MEMORY_INTER) %{
 5919     base($reg);
 5920     index($lreg);
 5921     scale(0x0);
 5922     disp(0x0);
 5923   %}
 5924 %}
 5925 
 5926 // Indirect Memory Times Scale Plus Index Register
 5927 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5928 %{
 5929   predicate(CompressedOops::shift() == 0);
 5930   constraint(ALLOC_IN_RC(ptr_reg));
 5931   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5932 
 5933   op_cost(10);
 5934   format %{"[$reg + $lreg << $scale]" %}
 5935   interface(MEMORY_INTER) %{
 5936     base($reg);
 5937     index($lreg);
 5938     scale($scale);
 5939     disp(0x0);
 5940   %}
 5941 %}
 5942 
 5943 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5944 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5945 %{
 5946   predicate(CompressedOops::shift() == 0);
 5947   constraint(ALLOC_IN_RC(ptr_reg));
 5948   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5949 
 5950   op_cost(10);
 5951   format %{"[$reg + $off + $lreg << $scale]" %}
 5952   interface(MEMORY_INTER) %{
 5953     base($reg);
 5954     index($lreg);
 5955     scale($scale);
 5956     disp($off);
 5957   %}
 5958 %}
 5959 
 5960 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5961 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5962 %{
 5963   constraint(ALLOC_IN_RC(ptr_reg));
 5964   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5965   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5966 
 5967   op_cost(10);
 5968   format %{"[$reg + $off + $idx]" %}
 5969   interface(MEMORY_INTER) %{
 5970     base($reg);
 5971     index($idx);
 5972     scale(0x0);
 5973     disp($off);
 5974   %}
 5975 %}
 5976 
 5977 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5978 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5979 %{
 5980   constraint(ALLOC_IN_RC(ptr_reg));
 5981   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5982   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5983 
 5984   op_cost(10);
 5985   format %{"[$reg + $off + $idx << $scale]" %}
 5986   interface(MEMORY_INTER) %{
 5987     base($reg);
 5988     index($idx);
 5989     scale($scale);
 5990     disp($off);
 5991   %}
 5992 %}
 5993 
 5994 //----------Special Memory Operands--------------------------------------------
 5995 // Stack Slot Operand - This operand is used for loading and storing temporary
 5996 //                      values on the stack where a match requires a value to
 5997 //                      flow through memory.
 5998 operand stackSlotP(sRegP reg)
 5999 %{
 6000   constraint(ALLOC_IN_RC(stack_slots));
 6001   // No match rule because this operand is only generated in matching
 6002 
 6003   format %{ "[$reg]" %}
 6004   interface(MEMORY_INTER) %{
 6005     base(0x4);   // RSP
 6006     index(0x4);  // No Index
 6007     scale(0x0);  // No Scale
 6008     disp($reg);  // Stack Offset
 6009   %}
 6010 %}
 6011 
 6012 operand stackSlotI(sRegI reg)
 6013 %{
 6014   constraint(ALLOC_IN_RC(stack_slots));
 6015   // No match rule because this operand is only generated in matching
 6016 
 6017   format %{ "[$reg]" %}
 6018   interface(MEMORY_INTER) %{
 6019     base(0x4);   // RSP
 6020     index(0x4);  // No Index
 6021     scale(0x0);  // No Scale
 6022     disp($reg);  // Stack Offset
 6023   %}
 6024 %}
 6025 
 6026 operand stackSlotF(sRegF reg)
 6027 %{
 6028   constraint(ALLOC_IN_RC(stack_slots));
 6029   // No match rule because this operand is only generated in matching
 6030 
 6031   format %{ "[$reg]" %}
 6032   interface(MEMORY_INTER) %{
 6033     base(0x4);   // RSP
 6034     index(0x4);  // No Index
 6035     scale(0x0);  // No Scale
 6036     disp($reg);  // Stack Offset
 6037   %}
 6038 %}
 6039 
 6040 operand stackSlotD(sRegD reg)
 6041 %{
 6042   constraint(ALLOC_IN_RC(stack_slots));
 6043   // No match rule because this operand is only generated in matching
 6044 
 6045   format %{ "[$reg]" %}
 6046   interface(MEMORY_INTER) %{
 6047     base(0x4);   // RSP
 6048     index(0x4);  // No Index
 6049     scale(0x0);  // No Scale
 6050     disp($reg);  // Stack Offset
 6051   %}
 6052 %}
 6053 operand stackSlotL(sRegL reg)
 6054 %{
 6055   constraint(ALLOC_IN_RC(stack_slots));
 6056   // No match rule because this operand is only generated in matching
 6057 
 6058   format %{ "[$reg]" %}
 6059   interface(MEMORY_INTER) %{
 6060     base(0x4);   // RSP
 6061     index(0x4);  // No Index
 6062     scale(0x0);  // No Scale
 6063     disp($reg);  // Stack Offset
 6064   %}
 6065 %}
 6066 
 6067 //----------Conditional Branch Operands----------------------------------------
 6068 // Comparison Op  - This is the operation of the comparison, and is limited to
 6069 //                  the following set of codes:
 6070 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6071 //
 6072 // Other attributes of the comparison, such as unsignedness, are specified
 6073 // by the comparison instruction that sets a condition code flags register.
 6074 // That result is represented by a flags operand whose subtype is appropriate
 6075 // to the unsignedness (etc.) of the comparison.
 6076 //
 6077 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6078 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6079 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6080 
 6081 // Comparison Code
 6082 operand cmpOp()
 6083 %{
 6084   match(Bool);
 6085 
 6086   format %{ "" %}
 6087   interface(COND_INTER) %{
 6088     equal(0x4, "e");
 6089     not_equal(0x5, "ne");
 6090     less(0xC, "l");
 6091     greater_equal(0xD, "ge");
 6092     less_equal(0xE, "le");
 6093     greater(0xF, "g");
 6094     overflow(0x0, "o");
 6095     no_overflow(0x1, "no");
 6096   %}
 6097 %}
 6098 
 6099 // Comparison Code, unsigned compare.  Used by FP also, with
 6100 // C2 (unordered) turned into GT or LT already.  The other bits
 6101 // C0 and C3 are turned into Carry & Zero flags.
 6102 operand cmpOpU()
 6103 %{
 6104   match(Bool);
 6105 
 6106   format %{ "" %}
 6107   interface(COND_INTER) %{
 6108     equal(0x4, "e");
 6109     not_equal(0x5, "ne");
 6110     less(0x2, "b");
 6111     greater_equal(0x3, "ae");
 6112     less_equal(0x6, "be");
 6113     greater(0x7, "a");
 6114     overflow(0x0, "o");
 6115     no_overflow(0x1, "no");
 6116   %}
 6117 %}
 6118 
 6119 
 6120 // Floating comparisons that don't require any fixup for the unordered case,
 6121 // If both inputs of the comparison are the same, ZF is always set so we
 6122 // don't need to use cmpOpUCF2 for eq/ne
 6123 operand cmpOpUCF() %{
 6124   match(Bool);
 6125   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 6126             n->as_Bool()->_test._test == BoolTest::ge ||
 6127             n->as_Bool()->_test._test == BoolTest::le ||
 6128             n->as_Bool()->_test._test == BoolTest::gt ||
 6129             n->in(1)->in(1) == n->in(1)->in(2));
 6130   format %{ "" %}
 6131   interface(COND_INTER) %{
 6132     equal(0xb, "np");
 6133     not_equal(0xa, "p");
 6134     less(0x2, "b");
 6135     greater_equal(0x3, "ae");
 6136     less_equal(0x6, "be");
 6137     greater(0x7, "a");
 6138     overflow(0x0, "o");
 6139     no_overflow(0x1, "no");
 6140   %}
 6141 %}
 6142 
 6143 
 6144 // Floating comparisons that can be fixed up with extra conditional jumps
 6145 operand cmpOpUCF2() %{
 6146   match(Bool);
 6147   predicate((n->as_Bool()->_test._test == BoolTest::ne ||
 6148              n->as_Bool()->_test._test == BoolTest::eq) &&
 6149             n->in(1)->in(1) != n->in(1)->in(2));
 6150   format %{ "" %}
 6151   interface(COND_INTER) %{
 6152     equal(0x4, "e");
 6153     not_equal(0x5, "ne");
 6154     less(0x2, "b");
 6155     greater_equal(0x3, "ae");
 6156     less_equal(0x6, "be");
 6157     greater(0x7, "a");
 6158     overflow(0x0, "o");
 6159     no_overflow(0x1, "no");
 6160   %}
 6161 %}
 6162 
 6163 // Operands for bound floating pointer register arguments
 6164 operand rxmm0() %{
 6165   constraint(ALLOC_IN_RC(xmm0_reg));
 6166   match(VecX);
 6167   format%{%}
 6168   interface(REG_INTER);
 6169 %}
 6170 
 6171 // Vectors
 6172 
 6173 // Dummy generic vector class. Should be used for all vector operands.
 6174 // Replaced with vec[SDXYZ] during post-selection pass.
 6175 operand vec() %{
 6176   constraint(ALLOC_IN_RC(dynamic));
 6177   match(VecX);
 6178   match(VecY);
 6179   match(VecZ);
 6180   match(VecS);
 6181   match(VecD);
 6182 
 6183   format %{ %}
 6184   interface(REG_INTER);
 6185 %}
 6186 
 6187 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6188 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6189 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6190 // runtime code generation via reg_class_dynamic.
 6191 operand legVec() %{
 6192   constraint(ALLOC_IN_RC(dynamic));
 6193   match(VecX);
 6194   match(VecY);
 6195   match(VecZ);
 6196   match(VecS);
 6197   match(VecD);
 6198 
 6199   format %{ %}
 6200   interface(REG_INTER);
 6201 %}
 6202 
 6203 // Replaces vec during post-selection cleanup. See above.
 6204 operand vecS() %{
 6205   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6206   match(VecS);
 6207 
 6208   format %{ %}
 6209   interface(REG_INTER);
 6210 %}
 6211 
 6212 // Replaces legVec during post-selection cleanup. See above.
 6213 operand legVecS() %{
 6214   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6215   match(VecS);
 6216 
 6217   format %{ %}
 6218   interface(REG_INTER);
 6219 %}
 6220 
 6221 // Replaces vec during post-selection cleanup. See above.
 6222 operand vecD() %{
 6223   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6224   match(VecD);
 6225 
 6226   format %{ %}
 6227   interface(REG_INTER);
 6228 %}
 6229 
 6230 // Replaces legVec during post-selection cleanup. See above.
 6231 operand legVecD() %{
 6232   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6233   match(VecD);
 6234 
 6235   format %{ %}
 6236   interface(REG_INTER);
 6237 %}
 6238 
 6239 // Replaces vec during post-selection cleanup. See above.
 6240 operand vecX() %{
 6241   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6242   match(VecX);
 6243 
 6244   format %{ %}
 6245   interface(REG_INTER);
 6246 %}
 6247 
 6248 // Replaces legVec during post-selection cleanup. See above.
 6249 operand legVecX() %{
 6250   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6251   match(VecX);
 6252 
 6253   format %{ %}
 6254   interface(REG_INTER);
 6255 %}
 6256 
 6257 // Replaces vec during post-selection cleanup. See above.
 6258 operand vecY() %{
 6259   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6260   match(VecY);
 6261 
 6262   format %{ %}
 6263   interface(REG_INTER);
 6264 %}
 6265 
 6266 // Replaces legVec during post-selection cleanup. See above.
 6267 operand legVecY() %{
 6268   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6269   match(VecY);
 6270 
 6271   format %{ %}
 6272   interface(REG_INTER);
 6273 %}
 6274 
 6275 // Replaces vec during post-selection cleanup. See above.
 6276 operand vecZ() %{
 6277   constraint(ALLOC_IN_RC(vectorz_reg));
 6278   match(VecZ);
 6279 
 6280   format %{ %}
 6281   interface(REG_INTER);
 6282 %}
 6283 
 6284 // Replaces legVec during post-selection cleanup. See above.
 6285 operand legVecZ() %{
 6286   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6287   match(VecZ);
 6288 
 6289   format %{ %}
 6290   interface(REG_INTER);
 6291 %}
 6292 
 6293 //----------OPERAND CLASSES----------------------------------------------------
 6294 // Operand Classes are groups of operands that are used as to simplify
 6295 // instruction definitions by not requiring the AD writer to specify separate
 6296 // instructions for every form of operand when the instruction accepts
 6297 // multiple operand types with the same basic encoding and format.  The classic
 6298 // case of this is memory operands.
 6299 
 6300 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6301                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6302                indCompressedOop, indCompressedOopOffset,
 6303                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6304                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6305                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6306 
 6307 //----------PIPELINE-----------------------------------------------------------
 6308 // Rules which define the behavior of the target architectures pipeline.
 6309 pipeline %{
 6310 
 6311 //----------ATTRIBUTES---------------------------------------------------------
 6312 attributes %{
 6313   variable_size_instructions;        // Fixed size instructions
 6314   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6315   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6316   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6317   instruction_fetch_units = 1;       // of 16 bytes
 6318 %}
 6319 
 6320 //----------RESOURCES----------------------------------------------------------
 6321 // Resources are the functional units available to the machine
 6322 
 6323 // Generic P2/P3 pipeline
 6324 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6325 // 3 instructions decoded per cycle.
 6326 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6327 // 3 ALU op, only ALU0 handles mul instructions.
 6328 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6329            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6330            BR, FPU,
 6331            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6332 
 6333 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6334 // Pipeline Description specifies the stages in the machine's pipeline
 6335 
 6336 // Generic P2/P3 pipeline
 6337 pipe_desc(S0, S1, S2, S3, S4, S5);
 6338 
 6339 //----------PIPELINE CLASSES---------------------------------------------------
 6340 // Pipeline Classes describe the stages in which input and output are
 6341 // referenced by the hardware pipeline.
 6342 
 6343 // Naming convention: ialu or fpu
 6344 // Then: _reg
 6345 // Then: _reg if there is a 2nd register
 6346 // Then: _long if it's a pair of instructions implementing a long
 6347 // Then: _fat if it requires the big decoder
 6348 //   Or: _mem if it requires the big decoder and a memory unit.
 6349 
 6350 // Integer ALU reg operation
 6351 pipe_class ialu_reg(rRegI dst)
 6352 %{
 6353     single_instruction;
 6354     dst    : S4(write);
 6355     dst    : S3(read);
 6356     DECODE : S0;        // any decoder
 6357     ALU    : S3;        // any alu
 6358 %}
 6359 
 6360 // Long ALU reg operation
 6361 pipe_class ialu_reg_long(rRegL dst)
 6362 %{
 6363     instruction_count(2);
 6364     dst    : S4(write);
 6365     dst    : S3(read);
 6366     DECODE : S0(2);     // any 2 decoders
 6367     ALU    : S3(2);     // both alus
 6368 %}
 6369 
 6370 // Integer ALU reg operation using big decoder
 6371 pipe_class ialu_reg_fat(rRegI dst)
 6372 %{
 6373     single_instruction;
 6374     dst    : S4(write);
 6375     dst    : S3(read);
 6376     D0     : S0;        // big decoder only
 6377     ALU    : S3;        // any alu
 6378 %}
 6379 
 6380 // Integer ALU reg-reg operation
 6381 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6382 %{
 6383     single_instruction;
 6384     dst    : S4(write);
 6385     src    : S3(read);
 6386     DECODE : S0;        // any decoder
 6387     ALU    : S3;        // any alu
 6388 %}
 6389 
 6390 // Integer ALU reg-reg operation
 6391 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6392 %{
 6393     single_instruction;
 6394     dst    : S4(write);
 6395     src    : S3(read);
 6396     D0     : S0;        // big decoder only
 6397     ALU    : S3;        // any alu
 6398 %}
 6399 
 6400 // Integer ALU reg-mem operation
 6401 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6402 %{
 6403     single_instruction;
 6404     dst    : S5(write);
 6405     mem    : S3(read);
 6406     D0     : S0;        // big decoder only
 6407     ALU    : S4;        // any alu
 6408     MEM    : S3;        // any mem
 6409 %}
 6410 
 6411 // Integer mem operation (prefetch)
 6412 pipe_class ialu_mem(memory mem)
 6413 %{
 6414     single_instruction;
 6415     mem    : S3(read);
 6416     D0     : S0;        // big decoder only
 6417     MEM    : S3;        // any mem
 6418 %}
 6419 
 6420 // Integer Store to Memory
 6421 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6422 %{
 6423     single_instruction;
 6424     mem    : S3(read);
 6425     src    : S5(read);
 6426     D0     : S0;        // big decoder only
 6427     ALU    : S4;        // any alu
 6428     MEM    : S3;
 6429 %}
 6430 
 6431 // // Long Store to Memory
 6432 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6433 // %{
 6434 //     instruction_count(2);
 6435 //     mem    : S3(read);
 6436 //     src    : S5(read);
 6437 //     D0     : S0(2);          // big decoder only; twice
 6438 //     ALU    : S4(2);     // any 2 alus
 6439 //     MEM    : S3(2);  // Both mems
 6440 // %}
 6441 
 6442 // Integer Store to Memory
 6443 pipe_class ialu_mem_imm(memory mem)
 6444 %{
 6445     single_instruction;
 6446     mem    : S3(read);
 6447     D0     : S0;        // big decoder only
 6448     ALU    : S4;        // any alu
 6449     MEM    : S3;
 6450 %}
 6451 
 6452 // Integer ALU0 reg-reg operation
 6453 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6454 %{
 6455     single_instruction;
 6456     dst    : S4(write);
 6457     src    : S3(read);
 6458     D0     : S0;        // Big decoder only
 6459     ALU0   : S3;        // only alu0
 6460 %}
 6461 
 6462 // Integer ALU0 reg-mem operation
 6463 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6464 %{
 6465     single_instruction;
 6466     dst    : S5(write);
 6467     mem    : S3(read);
 6468     D0     : S0;        // big decoder only
 6469     ALU0   : S4;        // ALU0 only
 6470     MEM    : S3;        // any mem
 6471 %}
 6472 
 6473 // Integer ALU reg-reg operation
 6474 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6475 %{
 6476     single_instruction;
 6477     cr     : S4(write);
 6478     src1   : S3(read);
 6479     src2   : S3(read);
 6480     DECODE : S0;        // any decoder
 6481     ALU    : S3;        // any alu
 6482 %}
 6483 
 6484 // Integer ALU reg-imm operation
 6485 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6486 %{
 6487     single_instruction;
 6488     cr     : S4(write);
 6489     src1   : S3(read);
 6490     DECODE : S0;        // any decoder
 6491     ALU    : S3;        // any alu
 6492 %}
 6493 
 6494 // Integer ALU reg-mem operation
 6495 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6496 %{
 6497     single_instruction;
 6498     cr     : S4(write);
 6499     src1   : S3(read);
 6500     src2   : S3(read);
 6501     D0     : S0;        // big decoder only
 6502     ALU    : S4;        // any alu
 6503     MEM    : S3;
 6504 %}
 6505 
 6506 // Conditional move reg-reg
 6507 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6508 %{
 6509     instruction_count(4);
 6510     y      : S4(read);
 6511     q      : S3(read);
 6512     p      : S3(read);
 6513     DECODE : S0(4);     // any decoder
 6514 %}
 6515 
 6516 // Conditional move reg-reg
 6517 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6518 %{
 6519     single_instruction;
 6520     dst    : S4(write);
 6521     src    : S3(read);
 6522     cr     : S3(read);
 6523     DECODE : S0;        // any decoder
 6524 %}
 6525 
 6526 // Conditional move reg-mem
 6527 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6528 %{
 6529     single_instruction;
 6530     dst    : S4(write);
 6531     src    : S3(read);
 6532     cr     : S3(read);
 6533     DECODE : S0;        // any decoder
 6534     MEM    : S3;
 6535 %}
 6536 
 6537 // Conditional move reg-reg long
 6538 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6539 %{
 6540     single_instruction;
 6541     dst    : S4(write);
 6542     src    : S3(read);
 6543     cr     : S3(read);
 6544     DECODE : S0(2);     // any 2 decoders
 6545 %}
 6546 
 6547 // Float reg-reg operation
 6548 pipe_class fpu_reg(regD dst)
 6549 %{
 6550     instruction_count(2);
 6551     dst    : S3(read);
 6552     DECODE : S0(2);     // any 2 decoders
 6553     FPU    : S3;
 6554 %}
 6555 
 6556 // Float reg-reg operation
 6557 pipe_class fpu_reg_reg(regD dst, regD src)
 6558 %{
 6559     instruction_count(2);
 6560     dst    : S4(write);
 6561     src    : S3(read);
 6562     DECODE : S0(2);     // any 2 decoders
 6563     FPU    : S3;
 6564 %}
 6565 
 6566 // Float reg-reg operation
 6567 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6568 %{
 6569     instruction_count(3);
 6570     dst    : S4(write);
 6571     src1   : S3(read);
 6572     src2   : S3(read);
 6573     DECODE : S0(3);     // any 3 decoders
 6574     FPU    : S3(2);
 6575 %}
 6576 
 6577 // Float reg-reg operation
 6578 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6579 %{
 6580     instruction_count(4);
 6581     dst    : S4(write);
 6582     src1   : S3(read);
 6583     src2   : S3(read);
 6584     src3   : S3(read);
 6585     DECODE : S0(4);     // any 3 decoders
 6586     FPU    : S3(2);
 6587 %}
 6588 
 6589 // Float reg-reg operation
 6590 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6591 %{
 6592     instruction_count(4);
 6593     dst    : S4(write);
 6594     src1   : S3(read);
 6595     src2   : S3(read);
 6596     src3   : S3(read);
 6597     DECODE : S1(3);     // any 3 decoders
 6598     D0     : S0;        // Big decoder only
 6599     FPU    : S3(2);
 6600     MEM    : S3;
 6601 %}
 6602 
 6603 // Float reg-mem operation
 6604 pipe_class fpu_reg_mem(regD dst, memory mem)
 6605 %{
 6606     instruction_count(2);
 6607     dst    : S5(write);
 6608     mem    : S3(read);
 6609     D0     : S0;        // big decoder only
 6610     DECODE : S1;        // any decoder for FPU POP
 6611     FPU    : S4;
 6612     MEM    : S3;        // any mem
 6613 %}
 6614 
 6615 // Float reg-mem operation
 6616 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6617 %{
 6618     instruction_count(3);
 6619     dst    : S5(write);
 6620     src1   : S3(read);
 6621     mem    : S3(read);
 6622     D0     : S0;        // big decoder only
 6623     DECODE : S1(2);     // any decoder for FPU POP
 6624     FPU    : S4;
 6625     MEM    : S3;        // any mem
 6626 %}
 6627 
 6628 // Float mem-reg operation
 6629 pipe_class fpu_mem_reg(memory mem, regD src)
 6630 %{
 6631     instruction_count(2);
 6632     src    : S5(read);
 6633     mem    : S3(read);
 6634     DECODE : S0;        // any decoder for FPU PUSH
 6635     D0     : S1;        // big decoder only
 6636     FPU    : S4;
 6637     MEM    : S3;        // any mem
 6638 %}
 6639 
 6640 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6641 %{
 6642     instruction_count(3);
 6643     src1   : S3(read);
 6644     src2   : S3(read);
 6645     mem    : S3(read);
 6646     DECODE : S0(2);     // any decoder for FPU PUSH
 6647     D0     : S1;        // big decoder only
 6648     FPU    : S4;
 6649     MEM    : S3;        // any mem
 6650 %}
 6651 
 6652 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6653 %{
 6654     instruction_count(3);
 6655     src1   : S3(read);
 6656     src2   : S3(read);
 6657     mem    : S4(read);
 6658     DECODE : S0;        // any decoder for FPU PUSH
 6659     D0     : S0(2);     // big decoder only
 6660     FPU    : S4;
 6661     MEM    : S3(2);     // any mem
 6662 %}
 6663 
 6664 pipe_class fpu_mem_mem(memory dst, memory src1)
 6665 %{
 6666     instruction_count(2);
 6667     src1   : S3(read);
 6668     dst    : S4(read);
 6669     D0     : S0(2);     // big decoder only
 6670     MEM    : S3(2);     // any mem
 6671 %}
 6672 
 6673 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6674 %{
 6675     instruction_count(3);
 6676     src1   : S3(read);
 6677     src2   : S3(read);
 6678     dst    : S4(read);
 6679     D0     : S0(3);     // big decoder only
 6680     FPU    : S4;
 6681     MEM    : S3(3);     // any mem
 6682 %}
 6683 
 6684 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6685 %{
 6686     instruction_count(3);
 6687     src1   : S4(read);
 6688     mem    : S4(read);
 6689     DECODE : S0;        // any decoder for FPU PUSH
 6690     D0     : S0(2);     // big decoder only
 6691     FPU    : S4;
 6692     MEM    : S3(2);     // any mem
 6693 %}
 6694 
 6695 // Float load constant
 6696 pipe_class fpu_reg_con(regD dst)
 6697 %{
 6698     instruction_count(2);
 6699     dst    : S5(write);
 6700     D0     : S0;        // big decoder only for the load
 6701     DECODE : S1;        // any decoder for FPU POP
 6702     FPU    : S4;
 6703     MEM    : S3;        // any mem
 6704 %}
 6705 
 6706 // Float load constant
 6707 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6708 %{
 6709     instruction_count(3);
 6710     dst    : S5(write);
 6711     src    : S3(read);
 6712     D0     : S0;        // big decoder only for the load
 6713     DECODE : S1(2);     // any decoder for FPU POP
 6714     FPU    : S4;
 6715     MEM    : S3;        // any mem
 6716 %}
 6717 
 6718 // UnConditional branch
 6719 pipe_class pipe_jmp(label labl)
 6720 %{
 6721     single_instruction;
 6722     BR   : S3;
 6723 %}
 6724 
 6725 // Conditional branch
 6726 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6727 %{
 6728     single_instruction;
 6729     cr    : S1(read);
 6730     BR    : S3;
 6731 %}
 6732 
 6733 // Allocation idiom
 6734 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6735 %{
 6736     instruction_count(1); force_serialization;
 6737     fixed_latency(6);
 6738     heap_ptr : S3(read);
 6739     DECODE   : S0(3);
 6740     D0       : S2;
 6741     MEM      : S3;
 6742     ALU      : S3(2);
 6743     dst      : S5(write);
 6744     BR       : S5;
 6745 %}
 6746 
 6747 // Generic big/slow expanded idiom
 6748 pipe_class pipe_slow()
 6749 %{
 6750     instruction_count(10); multiple_bundles; force_serialization;
 6751     fixed_latency(100);
 6752     D0  : S0(2);
 6753     MEM : S3(2);
 6754 %}
 6755 
 6756 // The real do-nothing guy
 6757 pipe_class empty()
 6758 %{
 6759     instruction_count(0);
 6760 %}
 6761 
 6762 // Define the class for the Nop node
 6763 define
 6764 %{
 6765    MachNop = empty;
 6766 %}
 6767 
 6768 %}
 6769 
 6770 //----------INSTRUCTIONS-------------------------------------------------------
 6771 //
 6772 // match      -- States which machine-independent subtree may be replaced
 6773 //               by this instruction.
 6774 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6775 //               selection to identify a minimum cost tree of machine
 6776 //               instructions that matches a tree of machine-independent
 6777 //               instructions.
 6778 // format     -- A string providing the disassembly for this instruction.
 6779 //               The value of an instruction's operand may be inserted
 6780 //               by referring to it with a '$' prefix.
 6781 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6782 //               to within an encode class as $primary, $secondary, and $tertiary
 6783 //               rrspectively.  The primary opcode is commonly used to
 6784 //               indicate the type of machine instruction, while secondary
 6785 //               and tertiary are often used for prefix options or addressing
 6786 //               modes.
 6787 // ins_encode -- A list of encode classes with parameters. The encode class
 6788 //               name must have been defined in an 'enc_class' specification
 6789 //               in the encode section of the architecture description.
 6790 
 6791 // ============================================================================
 6792 
 6793 instruct ShouldNotReachHere() %{
 6794   match(Halt);
 6795   format %{ "stop\t# ShouldNotReachHere" %}
 6796   ins_encode %{
 6797     if (is_reachable()) {
 6798       const char* str = __ code_string(_halt_reason);
 6799       __ stop(str);
 6800     }
 6801   %}
 6802   ins_pipe(pipe_slow);
 6803 %}
 6804 
 6805 // ============================================================================
 6806 
 6807 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6808 // Load Float
 6809 instruct MoveF2VL(vlRegF dst, regF src) %{
 6810   match(Set dst src);
 6811   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6812   ins_encode %{
 6813     ShouldNotReachHere();
 6814   %}
 6815   ins_pipe( fpu_reg_reg );
 6816 %}
 6817 
 6818 // Load Float
 6819 instruct MoveF2LEG(legRegF dst, regF src) %{
 6820   match(Set dst src);
 6821   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6822   ins_encode %{
 6823     ShouldNotReachHere();
 6824   %}
 6825   ins_pipe( fpu_reg_reg );
 6826 %}
 6827 
 6828 // Load Float
 6829 instruct MoveVL2F(regF dst, vlRegF src) %{
 6830   match(Set dst src);
 6831   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6832   ins_encode %{
 6833     ShouldNotReachHere();
 6834   %}
 6835   ins_pipe( fpu_reg_reg );
 6836 %}
 6837 
 6838 // Load Float
 6839 instruct MoveLEG2F(regF dst, legRegF src) %{
 6840   match(Set dst src);
 6841   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6842   ins_encode %{
 6843     ShouldNotReachHere();
 6844   %}
 6845   ins_pipe( fpu_reg_reg );
 6846 %}
 6847 
 6848 // Load Double
 6849 instruct MoveD2VL(vlRegD dst, regD src) %{
 6850   match(Set dst src);
 6851   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6852   ins_encode %{
 6853     ShouldNotReachHere();
 6854   %}
 6855   ins_pipe( fpu_reg_reg );
 6856 %}
 6857 
 6858 // Load Double
 6859 instruct MoveD2LEG(legRegD dst, regD src) %{
 6860   match(Set dst src);
 6861   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6862   ins_encode %{
 6863     ShouldNotReachHere();
 6864   %}
 6865   ins_pipe( fpu_reg_reg );
 6866 %}
 6867 
 6868 // Load Double
 6869 instruct MoveVL2D(regD dst, vlRegD src) %{
 6870   match(Set dst src);
 6871   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6872   ins_encode %{
 6873     ShouldNotReachHere();
 6874   %}
 6875   ins_pipe( fpu_reg_reg );
 6876 %}
 6877 
 6878 // Load Double
 6879 instruct MoveLEG2D(regD dst, legRegD src) %{
 6880   match(Set dst src);
 6881   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6882   ins_encode %{
 6883     ShouldNotReachHere();
 6884   %}
 6885   ins_pipe( fpu_reg_reg );
 6886 %}
 6887 
 6888 //----------Load/Store/Move Instructions---------------------------------------
 6889 //----------Load Instructions--------------------------------------------------
 6890 
 6891 // Load Byte (8 bit signed)
 6892 instruct loadB(rRegI dst, memory mem)
 6893 %{
 6894   match(Set dst (LoadB mem));
 6895 
 6896   ins_cost(125);
 6897   format %{ "movsbl  $dst, $mem\t# byte" %}
 6898 
 6899   ins_encode %{
 6900     __ movsbl($dst$$Register, $mem$$Address);
 6901   %}
 6902 
 6903   ins_pipe(ialu_reg_mem);
 6904 %}
 6905 
 6906 // Load Byte (8 bit signed) into Long Register
 6907 instruct loadB2L(rRegL dst, memory mem)
 6908 %{
 6909   match(Set dst (ConvI2L (LoadB mem)));
 6910 
 6911   ins_cost(125);
 6912   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6913 
 6914   ins_encode %{
 6915     __ movsbq($dst$$Register, $mem$$Address);
 6916   %}
 6917 
 6918   ins_pipe(ialu_reg_mem);
 6919 %}
 6920 
 6921 // Load Unsigned Byte (8 bit UNsigned)
 6922 instruct loadUB(rRegI dst, memory mem)
 6923 %{
 6924   match(Set dst (LoadUB mem));
 6925 
 6926   ins_cost(125);
 6927   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6928 
 6929   ins_encode %{
 6930     __ movzbl($dst$$Register, $mem$$Address);
 6931   %}
 6932 
 6933   ins_pipe(ialu_reg_mem);
 6934 %}
 6935 
 6936 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6937 instruct loadUB2L(rRegL dst, memory mem)
 6938 %{
 6939   match(Set dst (ConvI2L (LoadUB mem)));
 6940 
 6941   ins_cost(125);
 6942   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6943 
 6944   ins_encode %{
 6945     __ movzbq($dst$$Register, $mem$$Address);
 6946   %}
 6947 
 6948   ins_pipe(ialu_reg_mem);
 6949 %}
 6950 
 6951 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6952 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6953   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6954   effect(KILL cr);
 6955 
 6956   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 6957             "andl    $dst, right_n_bits($mask, 8)" %}
 6958   ins_encode %{
 6959     Register Rdst = $dst$$Register;
 6960     __ movzbq(Rdst, $mem$$Address);
 6961     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 6962   %}
 6963   ins_pipe(ialu_reg_mem);
 6964 %}
 6965 
 6966 // Load Short (16 bit signed)
 6967 instruct loadS(rRegI dst, memory mem)
 6968 %{
 6969   match(Set dst (LoadS mem));
 6970 
 6971   ins_cost(125);
 6972   format %{ "movswl $dst, $mem\t# short" %}
 6973 
 6974   ins_encode %{
 6975     __ movswl($dst$$Register, $mem$$Address);
 6976   %}
 6977 
 6978   ins_pipe(ialu_reg_mem);
 6979 %}
 6980 
 6981 // Load Short (16 bit signed) to Byte (8 bit signed)
 6982 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6983   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 6984 
 6985   ins_cost(125);
 6986   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 6987   ins_encode %{
 6988     __ movsbl($dst$$Register, $mem$$Address);
 6989   %}
 6990   ins_pipe(ialu_reg_mem);
 6991 %}
 6992 
 6993 // Load Short (16 bit signed) into Long Register
 6994 instruct loadS2L(rRegL dst, memory mem)
 6995 %{
 6996   match(Set dst (ConvI2L (LoadS mem)));
 6997 
 6998   ins_cost(125);
 6999   format %{ "movswq $dst, $mem\t# short -> long" %}
 7000 
 7001   ins_encode %{
 7002     __ movswq($dst$$Register, $mem$$Address);
 7003   %}
 7004 
 7005   ins_pipe(ialu_reg_mem);
 7006 %}
 7007 
 7008 // Load Unsigned Short/Char (16 bit UNsigned)
 7009 instruct loadUS(rRegI dst, memory mem)
 7010 %{
 7011   match(Set dst (LoadUS mem));
 7012 
 7013   ins_cost(125);
 7014   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 7015 
 7016   ins_encode %{
 7017     __ movzwl($dst$$Register, $mem$$Address);
 7018   %}
 7019 
 7020   ins_pipe(ialu_reg_mem);
 7021 %}
 7022 
 7023 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 7024 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7025   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 7026 
 7027   ins_cost(125);
 7028   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 7029   ins_encode %{
 7030     __ movsbl($dst$$Register, $mem$$Address);
 7031   %}
 7032   ins_pipe(ialu_reg_mem);
 7033 %}
 7034 
 7035 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 7036 instruct loadUS2L(rRegL dst, memory mem)
 7037 %{
 7038   match(Set dst (ConvI2L (LoadUS mem)));
 7039 
 7040   ins_cost(125);
 7041   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 7042 
 7043   ins_encode %{
 7044     __ movzwq($dst$$Register, $mem$$Address);
 7045   %}
 7046 
 7047   ins_pipe(ialu_reg_mem);
 7048 %}
 7049 
 7050 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7051 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7052   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7053 
 7054   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7055   ins_encode %{
 7056     __ movzbq($dst$$Register, $mem$$Address);
 7057   %}
 7058   ins_pipe(ialu_reg_mem);
 7059 %}
 7060 
 7061 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7062 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7063   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7064   effect(KILL cr);
 7065 
 7066   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7067             "andl    $dst, right_n_bits($mask, 16)" %}
 7068   ins_encode %{
 7069     Register Rdst = $dst$$Register;
 7070     __ movzwq(Rdst, $mem$$Address);
 7071     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7072   %}
 7073   ins_pipe(ialu_reg_mem);
 7074 %}
 7075 
 7076 // Load Integer
 7077 instruct loadI(rRegI dst, memory mem)
 7078 %{
 7079   match(Set dst (LoadI mem));
 7080 
 7081   ins_cost(125);
 7082   format %{ "movl    $dst, $mem\t# int" %}
 7083 
 7084   ins_encode %{
 7085     __ movl($dst$$Register, $mem$$Address);
 7086   %}
 7087 
 7088   ins_pipe(ialu_reg_mem);
 7089 %}
 7090 
 7091 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7092 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7093   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7094 
 7095   ins_cost(125);
 7096   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7097   ins_encode %{
 7098     __ movsbl($dst$$Register, $mem$$Address);
 7099   %}
 7100   ins_pipe(ialu_reg_mem);
 7101 %}
 7102 
 7103 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7104 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7105   match(Set dst (AndI (LoadI mem) mask));
 7106 
 7107   ins_cost(125);
 7108   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7109   ins_encode %{
 7110     __ movzbl($dst$$Register, $mem$$Address);
 7111   %}
 7112   ins_pipe(ialu_reg_mem);
 7113 %}
 7114 
 7115 // Load Integer (32 bit signed) to Short (16 bit signed)
 7116 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7117   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7118 
 7119   ins_cost(125);
 7120   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7121   ins_encode %{
 7122     __ movswl($dst$$Register, $mem$$Address);
 7123   %}
 7124   ins_pipe(ialu_reg_mem);
 7125 %}
 7126 
 7127 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7128 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7129   match(Set dst (AndI (LoadI mem) mask));
 7130 
 7131   ins_cost(125);
 7132   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7133   ins_encode %{
 7134     __ movzwl($dst$$Register, $mem$$Address);
 7135   %}
 7136   ins_pipe(ialu_reg_mem);
 7137 %}
 7138 
 7139 // Load Integer into Long Register
 7140 instruct loadI2L(rRegL dst, memory mem)
 7141 %{
 7142   match(Set dst (ConvI2L (LoadI mem)));
 7143 
 7144   ins_cost(125);
 7145   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7146 
 7147   ins_encode %{
 7148     __ movslq($dst$$Register, $mem$$Address);
 7149   %}
 7150 
 7151   ins_pipe(ialu_reg_mem);
 7152 %}
 7153 
 7154 // Load Integer with mask 0xFF into Long Register
 7155 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7156   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7157 
 7158   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7159   ins_encode %{
 7160     __ movzbq($dst$$Register, $mem$$Address);
 7161   %}
 7162   ins_pipe(ialu_reg_mem);
 7163 %}
 7164 
 7165 // Load Integer with mask 0xFFFF into Long Register
 7166 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7167   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7168 
 7169   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7170   ins_encode %{
 7171     __ movzwq($dst$$Register, $mem$$Address);
 7172   %}
 7173   ins_pipe(ialu_reg_mem);
 7174 %}
 7175 
 7176 // Load Integer with a 31-bit mask into Long Register
 7177 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7178   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7179   effect(KILL cr);
 7180 
 7181   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7182             "andl    $dst, $mask" %}
 7183   ins_encode %{
 7184     Register Rdst = $dst$$Register;
 7185     __ movl(Rdst, $mem$$Address);
 7186     __ andl(Rdst, $mask$$constant);
 7187   %}
 7188   ins_pipe(ialu_reg_mem);
 7189 %}
 7190 
 7191 // Load Unsigned Integer into Long Register
 7192 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7193 %{
 7194   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7195 
 7196   ins_cost(125);
 7197   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7198 
 7199   ins_encode %{
 7200     __ movl($dst$$Register, $mem$$Address);
 7201   %}
 7202 
 7203   ins_pipe(ialu_reg_mem);
 7204 %}
 7205 
 7206 // Load Long
 7207 instruct loadL(rRegL dst, memory mem)
 7208 %{
 7209   match(Set dst (LoadL mem));
 7210 
 7211   ins_cost(125);
 7212   format %{ "movq    $dst, $mem\t# long" %}
 7213 
 7214   ins_encode %{
 7215     __ movq($dst$$Register, $mem$$Address);
 7216   %}
 7217 
 7218   ins_pipe(ialu_reg_mem); // XXX
 7219 %}
 7220 
 7221 // Load Range
 7222 instruct loadRange(rRegI dst, memory mem)
 7223 %{
 7224   match(Set dst (LoadRange mem));
 7225 
 7226   ins_cost(125); // XXX
 7227   format %{ "movl    $dst, $mem\t# range" %}
 7228   ins_encode %{
 7229     __ movl($dst$$Register, $mem$$Address);
 7230   %}
 7231   ins_pipe(ialu_reg_mem);
 7232 %}
 7233 
 7234 // Load Pointer
 7235 instruct loadP(rRegP dst, memory mem)
 7236 %{
 7237   match(Set dst (LoadP mem));
 7238   predicate(n->as_Load()->barrier_data() == 0);
 7239 
 7240   ins_cost(125); // XXX
 7241   format %{ "movq    $dst, $mem\t# ptr" %}
 7242   ins_encode %{
 7243     __ movq($dst$$Register, $mem$$Address);
 7244   %}
 7245   ins_pipe(ialu_reg_mem); // XXX
 7246 %}
 7247 
 7248 // Load Compressed Pointer
 7249 instruct loadN(rRegN dst, memory mem)
 7250 %{
 7251    predicate(n->as_Load()->barrier_data() == 0);
 7252    match(Set dst (LoadN mem));
 7253 
 7254    ins_cost(125); // XXX
 7255    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7256    ins_encode %{
 7257      __ movl($dst$$Register, $mem$$Address);
 7258    %}
 7259    ins_pipe(ialu_reg_mem); // XXX
 7260 %}
 7261 
 7262 
 7263 // Load Klass Pointer
 7264 instruct loadKlass(rRegP dst, memory mem)
 7265 %{
 7266   match(Set dst (LoadKlass mem));
 7267 
 7268   ins_cost(125); // XXX
 7269   format %{ "movq    $dst, $mem\t# class" %}
 7270   ins_encode %{
 7271     __ movq($dst$$Register, $mem$$Address);
 7272   %}
 7273   ins_pipe(ialu_reg_mem); // XXX
 7274 %}
 7275 
 7276 // Load narrow Klass Pointer
 7277 instruct loadNKlass(rRegN dst, memory mem)
 7278 %{
 7279   predicate(!UseCompactObjectHeaders);
 7280   match(Set dst (LoadNKlass mem));
 7281 
 7282   ins_cost(125); // XXX
 7283   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7284   ins_encode %{
 7285     __ movl($dst$$Register, $mem$$Address);
 7286   %}
 7287   ins_pipe(ialu_reg_mem); // XXX
 7288 %}
 7289 
 7290 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7291 %{
 7292   predicate(UseCompactObjectHeaders);
 7293   match(Set dst (LoadNKlass mem));
 7294   effect(KILL cr);
 7295   ins_cost(125);
 7296   format %{
 7297     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7298     "shrl    $dst, markWord::klass_shift_at_offset"
 7299   %}
 7300   ins_encode %{
 7301     if (UseAPX) {
 7302       __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
 7303     }
 7304     else {
 7305       __ movl($dst$$Register, $mem$$Address);
 7306       __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7307     }
 7308   %}
 7309   ins_pipe(ialu_reg_mem);
 7310 %}
 7311 
 7312 // Load Float
 7313 instruct loadF(regF dst, memory mem)
 7314 %{
 7315   match(Set dst (LoadF mem));
 7316 
 7317   ins_cost(145); // XXX
 7318   format %{ "movss   $dst, $mem\t# float" %}
 7319   ins_encode %{
 7320     __ movflt($dst$$XMMRegister, $mem$$Address);
 7321   %}
 7322   ins_pipe(pipe_slow); // XXX
 7323 %}
 7324 
 7325 // Load Double
 7326 instruct loadD_partial(regD dst, memory mem)
 7327 %{
 7328   predicate(!UseXmmLoadAndClearUpper);
 7329   match(Set dst (LoadD mem));
 7330 
 7331   ins_cost(145); // XXX
 7332   format %{ "movlpd  $dst, $mem\t# double" %}
 7333   ins_encode %{
 7334     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7335   %}
 7336   ins_pipe(pipe_slow); // XXX
 7337 %}
 7338 
 7339 instruct loadD(regD dst, memory mem)
 7340 %{
 7341   predicate(UseXmmLoadAndClearUpper);
 7342   match(Set dst (LoadD mem));
 7343 
 7344   ins_cost(145); // XXX
 7345   format %{ "movsd   $dst, $mem\t# double" %}
 7346   ins_encode %{
 7347     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7348   %}
 7349   ins_pipe(pipe_slow); // XXX
 7350 %}
 7351 
 7352 // max = java.lang.Math.max(float a, float b)
 7353 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
 7354   predicate(VM_Version::supports_avx10_2());
 7355   match(Set dst (MaxF a b));
 7356   format %{ "maxF $dst, $a, $b" %}
 7357   ins_encode %{
 7358     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7359   %}
 7360   ins_pipe( pipe_slow );
 7361 %}
 7362 
 7363 // max = java.lang.Math.max(float a, float b)
 7364 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7365   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7366   match(Set dst (MaxF a b));
 7367   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7368   format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7369   ins_encode %{
 7370     __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7371   %}
 7372   ins_pipe( pipe_slow );
 7373 %}
 7374 
 7375 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7376   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7377   match(Set dst (MaxF a b));
 7378   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7379 
 7380   format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7381   ins_encode %{
 7382     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7383                     false /*min*/, true /*single*/);
 7384   %}
 7385   ins_pipe( pipe_slow );
 7386 %}
 7387 
 7388 // max = java.lang.Math.max(double a, double b)
 7389 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
 7390   predicate(VM_Version::supports_avx10_2());
 7391   match(Set dst (MaxD a b));
 7392   format %{ "maxD $dst, $a, $b" %}
 7393   ins_encode %{
 7394     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7395   %}
 7396   ins_pipe( pipe_slow );
 7397 %}
 7398 
 7399 // max = java.lang.Math.max(double a, double b)
 7400 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7401   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7402   match(Set dst (MaxD a b));
 7403   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7404   format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7405   ins_encode %{
 7406     __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7407   %}
 7408   ins_pipe( pipe_slow );
 7409 %}
 7410 
 7411 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7412   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7413   match(Set dst (MaxD a b));
 7414   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7415 
 7416   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7417   ins_encode %{
 7418     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7419                     false /*min*/, false /*single*/);
 7420   %}
 7421   ins_pipe( pipe_slow );
 7422 %}
 7423 
 7424 // max = java.lang.Math.min(float a, float b)
 7425 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
 7426   predicate(VM_Version::supports_avx10_2());
 7427   match(Set dst (MinF a b));
 7428   format %{ "minF $dst, $a, $b" %}
 7429   ins_encode %{
 7430     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7431   %}
 7432   ins_pipe( pipe_slow );
 7433 %}
 7434 
 7435 // min = java.lang.Math.min(float a, float b)
 7436 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7437   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7438   match(Set dst (MinF a b));
 7439   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7440   format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7441   ins_encode %{
 7442     __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7443   %}
 7444   ins_pipe( pipe_slow );
 7445 %}
 7446 
 7447 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7448   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7449   match(Set dst (MinF a b));
 7450   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7451 
 7452   format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7453   ins_encode %{
 7454     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7455                     true /*min*/, true /*single*/);
 7456   %}
 7457   ins_pipe( pipe_slow );
 7458 %}
 7459 
 7460 // max = java.lang.Math.min(double a, double b)
 7461 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
 7462   predicate(VM_Version::supports_avx10_2());
 7463   match(Set dst (MinD a b));
 7464   format %{ "minD $dst, $a, $b" %}
 7465   ins_encode %{
 7466     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7467   %}
 7468   ins_pipe( pipe_slow );
 7469 %}
 7470 
 7471 // min = java.lang.Math.min(double a, double b)
 7472 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7473   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7474   match(Set dst (MinD a b));
 7475   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7476     format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7477   ins_encode %{
 7478     __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7479   %}
 7480   ins_pipe( pipe_slow );
 7481 %}
 7482 
 7483 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7484   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7485   match(Set dst (MinD a b));
 7486   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7487 
 7488   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7489   ins_encode %{
 7490     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7491                     true /*min*/, false /*single*/);
 7492   %}
 7493   ins_pipe( pipe_slow );
 7494 %}
 7495 
 7496 // Load Effective Address
 7497 instruct leaP8(rRegP dst, indOffset8 mem)
 7498 %{
 7499   match(Set dst mem);
 7500 
 7501   ins_cost(110); // XXX
 7502   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7503   ins_encode %{
 7504     __ leaq($dst$$Register, $mem$$Address);
 7505   %}
 7506   ins_pipe(ialu_reg_reg_fat);
 7507 %}
 7508 
 7509 instruct leaP32(rRegP dst, indOffset32 mem)
 7510 %{
 7511   match(Set dst mem);
 7512 
 7513   ins_cost(110);
 7514   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7515   ins_encode %{
 7516     __ leaq($dst$$Register, $mem$$Address);
 7517   %}
 7518   ins_pipe(ialu_reg_reg_fat);
 7519 %}
 7520 
 7521 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7522 %{
 7523   match(Set dst mem);
 7524 
 7525   ins_cost(110);
 7526   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7527   ins_encode %{
 7528     __ leaq($dst$$Register, $mem$$Address);
 7529   %}
 7530   ins_pipe(ialu_reg_reg_fat);
 7531 %}
 7532 
 7533 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7534 %{
 7535   match(Set dst mem);
 7536 
 7537   ins_cost(110);
 7538   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7539   ins_encode %{
 7540     __ leaq($dst$$Register, $mem$$Address);
 7541   %}
 7542   ins_pipe(ialu_reg_reg_fat);
 7543 %}
 7544 
 7545 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7546 %{
 7547   match(Set dst mem);
 7548 
 7549   ins_cost(110);
 7550   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7551   ins_encode %{
 7552     __ leaq($dst$$Register, $mem$$Address);
 7553   %}
 7554   ins_pipe(ialu_reg_reg_fat);
 7555 %}
 7556 
 7557 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7558 %{
 7559   match(Set dst mem);
 7560 
 7561   ins_cost(110);
 7562   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7563   ins_encode %{
 7564     __ leaq($dst$$Register, $mem$$Address);
 7565   %}
 7566   ins_pipe(ialu_reg_reg_fat);
 7567 %}
 7568 
 7569 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7570 %{
 7571   match(Set dst mem);
 7572 
 7573   ins_cost(110);
 7574   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7575   ins_encode %{
 7576     __ leaq($dst$$Register, $mem$$Address);
 7577   %}
 7578   ins_pipe(ialu_reg_reg_fat);
 7579 %}
 7580 
 7581 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7582 %{
 7583   match(Set dst mem);
 7584 
 7585   ins_cost(110);
 7586   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7587   ins_encode %{
 7588     __ leaq($dst$$Register, $mem$$Address);
 7589   %}
 7590   ins_pipe(ialu_reg_reg_fat);
 7591 %}
 7592 
 7593 // Load Effective Address which uses Narrow (32-bits) oop
 7594 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7595 %{
 7596   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7597   match(Set dst mem);
 7598 
 7599   ins_cost(110);
 7600   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7601   ins_encode %{
 7602     __ leaq($dst$$Register, $mem$$Address);
 7603   %}
 7604   ins_pipe(ialu_reg_reg_fat);
 7605 %}
 7606 
 7607 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7608 %{
 7609   predicate(CompressedOops::shift() == 0);
 7610   match(Set dst mem);
 7611 
 7612   ins_cost(110); // XXX
 7613   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7614   ins_encode %{
 7615     __ leaq($dst$$Register, $mem$$Address);
 7616   %}
 7617   ins_pipe(ialu_reg_reg_fat);
 7618 %}
 7619 
 7620 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7621 %{
 7622   predicate(CompressedOops::shift() == 0);
 7623   match(Set dst mem);
 7624 
 7625   ins_cost(110);
 7626   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7627   ins_encode %{
 7628     __ leaq($dst$$Register, $mem$$Address);
 7629   %}
 7630   ins_pipe(ialu_reg_reg_fat);
 7631 %}
 7632 
 7633 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7634 %{
 7635   predicate(CompressedOops::shift() == 0);
 7636   match(Set dst mem);
 7637 
 7638   ins_cost(110);
 7639   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7640   ins_encode %{
 7641     __ leaq($dst$$Register, $mem$$Address);
 7642   %}
 7643   ins_pipe(ialu_reg_reg_fat);
 7644 %}
 7645 
 7646 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7647 %{
 7648   predicate(CompressedOops::shift() == 0);
 7649   match(Set dst mem);
 7650 
 7651   ins_cost(110);
 7652   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7653   ins_encode %{
 7654     __ leaq($dst$$Register, $mem$$Address);
 7655   %}
 7656   ins_pipe(ialu_reg_reg_fat);
 7657 %}
 7658 
 7659 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7660 %{
 7661   predicate(CompressedOops::shift() == 0);
 7662   match(Set dst mem);
 7663 
 7664   ins_cost(110);
 7665   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7666   ins_encode %{
 7667     __ leaq($dst$$Register, $mem$$Address);
 7668   %}
 7669   ins_pipe(ialu_reg_reg_fat);
 7670 %}
 7671 
 7672 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7673 %{
 7674   predicate(CompressedOops::shift() == 0);
 7675   match(Set dst mem);
 7676 
 7677   ins_cost(110);
 7678   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7679   ins_encode %{
 7680     __ leaq($dst$$Register, $mem$$Address);
 7681   %}
 7682   ins_pipe(ialu_reg_reg_fat);
 7683 %}
 7684 
 7685 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7686 %{
 7687   predicate(CompressedOops::shift() == 0);
 7688   match(Set dst mem);
 7689 
 7690   ins_cost(110);
 7691   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7692   ins_encode %{
 7693     __ leaq($dst$$Register, $mem$$Address);
 7694   %}
 7695   ins_pipe(ialu_reg_reg_fat);
 7696 %}
 7697 
 7698 instruct loadConI(rRegI dst, immI src)
 7699 %{
 7700   match(Set dst src);
 7701 
 7702   format %{ "movl    $dst, $src\t# int" %}
 7703   ins_encode %{
 7704     __ movl($dst$$Register, $src$$constant);
 7705   %}
 7706   ins_pipe(ialu_reg_fat); // XXX
 7707 %}
 7708 
 7709 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7710 %{
 7711   match(Set dst src);
 7712   effect(KILL cr);
 7713 
 7714   ins_cost(50);
 7715   format %{ "xorl    $dst, $dst\t# int" %}
 7716   ins_encode %{
 7717     __ xorl($dst$$Register, $dst$$Register);
 7718   %}
 7719   ins_pipe(ialu_reg);
 7720 %}
 7721 
 7722 instruct loadConL(rRegL dst, immL src)
 7723 %{
 7724   match(Set dst src);
 7725 
 7726   ins_cost(150);
 7727   format %{ "movq    $dst, $src\t# long" %}
 7728   ins_encode %{
 7729     __ mov64($dst$$Register, $src$$constant);
 7730   %}
 7731   ins_pipe(ialu_reg);
 7732 %}
 7733 
 7734 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7735 %{
 7736   match(Set dst src);
 7737   effect(KILL cr);
 7738 
 7739   ins_cost(50);
 7740   format %{ "xorl    $dst, $dst\t# long" %}
 7741   ins_encode %{
 7742     __ xorl($dst$$Register, $dst$$Register);
 7743   %}
 7744   ins_pipe(ialu_reg); // XXX
 7745 %}
 7746 
 7747 instruct loadConUL32(rRegL dst, immUL32 src)
 7748 %{
 7749   match(Set dst src);
 7750 
 7751   ins_cost(60);
 7752   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7753   ins_encode %{
 7754     __ movl($dst$$Register, $src$$constant);
 7755   %}
 7756   ins_pipe(ialu_reg);
 7757 %}
 7758 
 7759 instruct loadConL32(rRegL dst, immL32 src)
 7760 %{
 7761   match(Set dst src);
 7762 
 7763   ins_cost(70);
 7764   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7765   ins_encode %{
 7766     __ movq($dst$$Register, $src$$constant);
 7767   %}
 7768   ins_pipe(ialu_reg);
 7769 %}
 7770 
 7771 instruct loadConP(rRegP dst, immP con) %{
 7772   match(Set dst con);
 7773 
 7774   format %{ "movq    $dst, $con\t# ptr" %}
 7775   ins_encode %{
 7776     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7777   %}
 7778   ins_pipe(ialu_reg_fat); // XXX
 7779 %}
 7780 
 7781 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7782 %{
 7783   match(Set dst src);
 7784   effect(KILL cr);
 7785 
 7786   ins_cost(50);
 7787   format %{ "xorl    $dst, $dst\t# ptr" %}
 7788   ins_encode %{
 7789     __ xorl($dst$$Register, $dst$$Register);
 7790   %}
 7791   ins_pipe(ialu_reg);
 7792 %}
 7793 
 7794 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7795 %{
 7796   match(Set dst src);
 7797   effect(KILL cr);
 7798 
 7799   ins_cost(60);
 7800   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7801   ins_encode %{
 7802     __ movl($dst$$Register, $src$$constant);
 7803   %}
 7804   ins_pipe(ialu_reg);
 7805 %}
 7806 
 7807 instruct loadConF(regF dst, immF con) %{
 7808   match(Set dst con);
 7809   ins_cost(125);
 7810   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7811   ins_encode %{
 7812     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7813   %}
 7814   ins_pipe(pipe_slow);
 7815 %}
 7816 
 7817 instruct loadConH(regF dst, immH con) %{
 7818   match(Set dst con);
 7819   ins_cost(125);
 7820   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7821   ins_encode %{
 7822     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7823   %}
 7824   ins_pipe(pipe_slow);
 7825 %}
 7826 
 7827 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7828   match(Set dst src);
 7829   effect(KILL cr);
 7830   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7831   ins_encode %{
 7832     __ xorq($dst$$Register, $dst$$Register);
 7833   %}
 7834   ins_pipe(ialu_reg);
 7835 %}
 7836 
 7837 instruct loadConN(rRegN dst, immN src) %{
 7838   match(Set dst src);
 7839 
 7840   ins_cost(125);
 7841   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7842   ins_encode %{
 7843     address con = (address)$src$$constant;
 7844     if (con == nullptr) {
 7845       ShouldNotReachHere();
 7846     } else {
 7847       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7848     }
 7849   %}
 7850   ins_pipe(ialu_reg_fat); // XXX
 7851 %}
 7852 
 7853 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7854   match(Set dst src);
 7855 
 7856   ins_cost(125);
 7857   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7858   ins_encode %{
 7859     address con = (address)$src$$constant;
 7860     if (con == nullptr) {
 7861       ShouldNotReachHere();
 7862     } else {
 7863       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7864     }
 7865   %}
 7866   ins_pipe(ialu_reg_fat); // XXX
 7867 %}
 7868 
 7869 instruct loadConF0(regF dst, immF0 src)
 7870 %{
 7871   match(Set dst src);
 7872   ins_cost(100);
 7873 
 7874   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7875   ins_encode %{
 7876     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7877   %}
 7878   ins_pipe(pipe_slow);
 7879 %}
 7880 
 7881 // Use the same format since predicate() can not be used here.
 7882 instruct loadConD(regD dst, immD con) %{
 7883   match(Set dst con);
 7884   ins_cost(125);
 7885   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7886   ins_encode %{
 7887     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7888   %}
 7889   ins_pipe(pipe_slow);
 7890 %}
 7891 
 7892 instruct loadConD0(regD dst, immD0 src)
 7893 %{
 7894   match(Set dst src);
 7895   ins_cost(100);
 7896 
 7897   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7898   ins_encode %{
 7899     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7900   %}
 7901   ins_pipe(pipe_slow);
 7902 %}
 7903 
 7904 instruct loadSSI(rRegI dst, stackSlotI src)
 7905 %{
 7906   match(Set dst src);
 7907 
 7908   ins_cost(125);
 7909   format %{ "movl    $dst, $src\t# int stk" %}
 7910   ins_encode %{
 7911     __ movl($dst$$Register, $src$$Address);
 7912   %}
 7913   ins_pipe(ialu_reg_mem);
 7914 %}
 7915 
 7916 instruct loadSSL(rRegL dst, stackSlotL src)
 7917 %{
 7918   match(Set dst src);
 7919 
 7920   ins_cost(125);
 7921   format %{ "movq    $dst, $src\t# long stk" %}
 7922   ins_encode %{
 7923     __ movq($dst$$Register, $src$$Address);
 7924   %}
 7925   ins_pipe(ialu_reg_mem);
 7926 %}
 7927 
 7928 instruct loadSSP(rRegP dst, stackSlotP src)
 7929 %{
 7930   match(Set dst src);
 7931 
 7932   ins_cost(125);
 7933   format %{ "movq    $dst, $src\t# ptr stk" %}
 7934   ins_encode %{
 7935     __ movq($dst$$Register, $src$$Address);
 7936   %}
 7937   ins_pipe(ialu_reg_mem);
 7938 %}
 7939 
 7940 instruct loadSSF(regF dst, stackSlotF src)
 7941 %{
 7942   match(Set dst src);
 7943 
 7944   ins_cost(125);
 7945   format %{ "movss   $dst, $src\t# float stk" %}
 7946   ins_encode %{
 7947     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7948   %}
 7949   ins_pipe(pipe_slow); // XXX
 7950 %}
 7951 
 7952 // Use the same format since predicate() can not be used here.
 7953 instruct loadSSD(regD dst, stackSlotD src)
 7954 %{
 7955   match(Set dst src);
 7956 
 7957   ins_cost(125);
 7958   format %{ "movsd   $dst, $src\t# double stk" %}
 7959   ins_encode  %{
 7960     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 7961   %}
 7962   ins_pipe(pipe_slow); // XXX
 7963 %}
 7964 
 7965 // Prefetch instructions for allocation.
 7966 // Must be safe to execute with invalid address (cannot fault).
 7967 
 7968 instruct prefetchAlloc( memory mem ) %{
 7969   predicate(AllocatePrefetchInstr==3);
 7970   match(PrefetchAllocation mem);
 7971   ins_cost(125);
 7972 
 7973   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 7974   ins_encode %{
 7975     __ prefetchw($mem$$Address);
 7976   %}
 7977   ins_pipe(ialu_mem);
 7978 %}
 7979 
 7980 instruct prefetchAllocNTA( memory mem ) %{
 7981   predicate(AllocatePrefetchInstr==0);
 7982   match(PrefetchAllocation mem);
 7983   ins_cost(125);
 7984 
 7985   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 7986   ins_encode %{
 7987     __ prefetchnta($mem$$Address);
 7988   %}
 7989   ins_pipe(ialu_mem);
 7990 %}
 7991 
 7992 instruct prefetchAllocT0( memory mem ) %{
 7993   predicate(AllocatePrefetchInstr==1);
 7994   match(PrefetchAllocation mem);
 7995   ins_cost(125);
 7996 
 7997   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 7998   ins_encode %{
 7999     __ prefetcht0($mem$$Address);
 8000   %}
 8001   ins_pipe(ialu_mem);
 8002 %}
 8003 
 8004 instruct prefetchAllocT2( memory mem ) %{
 8005   predicate(AllocatePrefetchInstr==2);
 8006   match(PrefetchAllocation mem);
 8007   ins_cost(125);
 8008 
 8009   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 8010   ins_encode %{
 8011     __ prefetcht2($mem$$Address);
 8012   %}
 8013   ins_pipe(ialu_mem);
 8014 %}
 8015 
 8016 //----------Store Instructions-------------------------------------------------
 8017 
 8018 // Store Byte
 8019 instruct storeB(memory mem, rRegI src)
 8020 %{
 8021   match(Set mem (StoreB mem src));
 8022 
 8023   ins_cost(125); // XXX
 8024   format %{ "movb    $mem, $src\t# byte" %}
 8025   ins_encode %{
 8026     __ movb($mem$$Address, $src$$Register);
 8027   %}
 8028   ins_pipe(ialu_mem_reg);
 8029 %}
 8030 
 8031 // Store Char/Short
 8032 instruct storeC(memory mem, rRegI src)
 8033 %{
 8034   match(Set mem (StoreC mem src));
 8035 
 8036   ins_cost(125); // XXX
 8037   format %{ "movw    $mem, $src\t# char/short" %}
 8038   ins_encode %{
 8039     __ movw($mem$$Address, $src$$Register);
 8040   %}
 8041   ins_pipe(ialu_mem_reg);
 8042 %}
 8043 
 8044 // Store Integer
 8045 instruct storeI(memory mem, rRegI src)
 8046 %{
 8047   match(Set mem (StoreI mem src));
 8048 
 8049   ins_cost(125); // XXX
 8050   format %{ "movl    $mem, $src\t# int" %}
 8051   ins_encode %{
 8052     __ movl($mem$$Address, $src$$Register);
 8053   %}
 8054   ins_pipe(ialu_mem_reg);
 8055 %}
 8056 
 8057 // Store Long
 8058 instruct storeL(memory mem, rRegL src)
 8059 %{
 8060   match(Set mem (StoreL mem src));
 8061 
 8062   ins_cost(125); // XXX
 8063   format %{ "movq    $mem, $src\t# long" %}
 8064   ins_encode %{
 8065     __ movq($mem$$Address, $src$$Register);
 8066   %}
 8067   ins_pipe(ialu_mem_reg); // XXX
 8068 %}
 8069 
 8070 // Store Pointer
 8071 instruct storeP(memory mem, any_RegP src)
 8072 %{
 8073   predicate(n->as_Store()->barrier_data() == 0);
 8074   match(Set mem (StoreP mem src));
 8075 
 8076   ins_cost(125); // XXX
 8077   format %{ "movq    $mem, $src\t# ptr" %}
 8078   ins_encode %{
 8079     __ movq($mem$$Address, $src$$Register);
 8080   %}
 8081   ins_pipe(ialu_mem_reg);
 8082 %}
 8083 
 8084 instruct storeImmP0(memory mem, immP0 zero)
 8085 %{
 8086   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8087   match(Set mem (StoreP mem zero));
 8088 
 8089   ins_cost(125); // XXX
 8090   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8091   ins_encode %{
 8092     __ movq($mem$$Address, r12);
 8093   %}
 8094   ins_pipe(ialu_mem_reg);
 8095 %}
 8096 
 8097 // Store Null Pointer, mark word, or other simple pointer constant.
 8098 instruct storeImmP(memory mem, immP31 src)
 8099 %{
 8100   predicate(n->as_Store()->barrier_data() == 0);
 8101   match(Set mem (StoreP mem src));
 8102 
 8103   ins_cost(150); // XXX
 8104   format %{ "movq    $mem, $src\t# ptr" %}
 8105   ins_encode %{
 8106     __ movq($mem$$Address, $src$$constant);
 8107   %}
 8108   ins_pipe(ialu_mem_imm);
 8109 %}
 8110 
 8111 // Store Compressed Pointer
 8112 instruct storeN(memory mem, rRegN src)
 8113 %{
 8114   predicate(n->as_Store()->barrier_data() == 0);
 8115   match(Set mem (StoreN mem src));
 8116 
 8117   ins_cost(125); // XXX
 8118   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8119   ins_encode %{
 8120     __ movl($mem$$Address, $src$$Register);
 8121   %}
 8122   ins_pipe(ialu_mem_reg);
 8123 %}
 8124 
 8125 instruct storeNKlass(memory mem, rRegN src)
 8126 %{
 8127   match(Set mem (StoreNKlass mem src));
 8128 
 8129   ins_cost(125); // XXX
 8130   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8131   ins_encode %{
 8132     __ movl($mem$$Address, $src$$Register);
 8133   %}
 8134   ins_pipe(ialu_mem_reg);
 8135 %}
 8136 
 8137 instruct storeImmN0(memory mem, immN0 zero)
 8138 %{
 8139   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8140   match(Set mem (StoreN mem zero));
 8141 
 8142   ins_cost(125); // XXX
 8143   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8144   ins_encode %{
 8145     __ movl($mem$$Address, r12);
 8146   %}
 8147   ins_pipe(ialu_mem_reg);
 8148 %}
 8149 
 8150 instruct storeImmN(memory mem, immN src)
 8151 %{
 8152   predicate(n->as_Store()->barrier_data() == 0);
 8153   match(Set mem (StoreN mem src));
 8154 
 8155   ins_cost(150); // XXX
 8156   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8157   ins_encode %{
 8158     address con = (address)$src$$constant;
 8159     if (con == nullptr) {
 8160       __ movl($mem$$Address, 0);
 8161     } else {
 8162       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8163     }
 8164   %}
 8165   ins_pipe(ialu_mem_imm);
 8166 %}
 8167 
 8168 instruct storeImmNKlass(memory mem, immNKlass src)
 8169 %{
 8170   match(Set mem (StoreNKlass mem src));
 8171 
 8172   ins_cost(150); // XXX
 8173   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8174   ins_encode %{
 8175     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8176   %}
 8177   ins_pipe(ialu_mem_imm);
 8178 %}
 8179 
 8180 // Store Integer Immediate
 8181 instruct storeImmI0(memory mem, immI_0 zero)
 8182 %{
 8183   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8184   match(Set mem (StoreI mem zero));
 8185 
 8186   ins_cost(125); // XXX
 8187   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8188   ins_encode %{
 8189     __ movl($mem$$Address, r12);
 8190   %}
 8191   ins_pipe(ialu_mem_reg);
 8192 %}
 8193 
 8194 instruct storeImmI(memory mem, immI src)
 8195 %{
 8196   match(Set mem (StoreI mem src));
 8197 
 8198   ins_cost(150);
 8199   format %{ "movl    $mem, $src\t# int" %}
 8200   ins_encode %{
 8201     __ movl($mem$$Address, $src$$constant);
 8202   %}
 8203   ins_pipe(ialu_mem_imm);
 8204 %}
 8205 
 8206 // Store Long Immediate
 8207 instruct storeImmL0(memory mem, immL0 zero)
 8208 %{
 8209   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8210   match(Set mem (StoreL mem zero));
 8211 
 8212   ins_cost(125); // XXX
 8213   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8214   ins_encode %{
 8215     __ movq($mem$$Address, r12);
 8216   %}
 8217   ins_pipe(ialu_mem_reg);
 8218 %}
 8219 
 8220 instruct storeImmL(memory mem, immL32 src)
 8221 %{
 8222   match(Set mem (StoreL mem src));
 8223 
 8224   ins_cost(150);
 8225   format %{ "movq    $mem, $src\t# long" %}
 8226   ins_encode %{
 8227     __ movq($mem$$Address, $src$$constant);
 8228   %}
 8229   ins_pipe(ialu_mem_imm);
 8230 %}
 8231 
 8232 // Store Short/Char Immediate
 8233 instruct storeImmC0(memory mem, immI_0 zero)
 8234 %{
 8235   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8236   match(Set mem (StoreC mem zero));
 8237 
 8238   ins_cost(125); // XXX
 8239   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8240   ins_encode %{
 8241     __ movw($mem$$Address, r12);
 8242   %}
 8243   ins_pipe(ialu_mem_reg);
 8244 %}
 8245 
 8246 instruct storeImmI16(memory mem, immI16 src)
 8247 %{
 8248   predicate(UseStoreImmI16);
 8249   match(Set mem (StoreC mem src));
 8250 
 8251   ins_cost(150);
 8252   format %{ "movw    $mem, $src\t# short/char" %}
 8253   ins_encode %{
 8254     __ movw($mem$$Address, $src$$constant);
 8255   %}
 8256   ins_pipe(ialu_mem_imm);
 8257 %}
 8258 
 8259 // Store Byte Immediate
 8260 instruct storeImmB0(memory mem, immI_0 zero)
 8261 %{
 8262   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8263   match(Set mem (StoreB mem zero));
 8264 
 8265   ins_cost(125); // XXX
 8266   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8267   ins_encode %{
 8268     __ movb($mem$$Address, r12);
 8269   %}
 8270   ins_pipe(ialu_mem_reg);
 8271 %}
 8272 
 8273 instruct storeImmB(memory mem, immI8 src)
 8274 %{
 8275   match(Set mem (StoreB mem src));
 8276 
 8277   ins_cost(150); // XXX
 8278   format %{ "movb    $mem, $src\t# byte" %}
 8279   ins_encode %{
 8280     __ movb($mem$$Address, $src$$constant);
 8281   %}
 8282   ins_pipe(ialu_mem_imm);
 8283 %}
 8284 
 8285 // Store Float
 8286 instruct storeF(memory mem, regF src)
 8287 %{
 8288   match(Set mem (StoreF mem src));
 8289 
 8290   ins_cost(95); // XXX
 8291   format %{ "movss   $mem, $src\t# float" %}
 8292   ins_encode %{
 8293     __ movflt($mem$$Address, $src$$XMMRegister);
 8294   %}
 8295   ins_pipe(pipe_slow); // XXX
 8296 %}
 8297 
 8298 // Store immediate Float value (it is faster than store from XMM register)
 8299 instruct storeF0(memory mem, immF0 zero)
 8300 %{
 8301   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8302   match(Set mem (StoreF mem zero));
 8303 
 8304   ins_cost(25); // XXX
 8305   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8306   ins_encode %{
 8307     __ movl($mem$$Address, r12);
 8308   %}
 8309   ins_pipe(ialu_mem_reg);
 8310 %}
 8311 
 8312 instruct storeF_imm(memory mem, immF src)
 8313 %{
 8314   match(Set mem (StoreF mem src));
 8315 
 8316   ins_cost(50);
 8317   format %{ "movl    $mem, $src\t# float" %}
 8318   ins_encode %{
 8319     __ movl($mem$$Address, jint_cast($src$$constant));
 8320   %}
 8321   ins_pipe(ialu_mem_imm);
 8322 %}
 8323 
 8324 // Store Double
 8325 instruct storeD(memory mem, regD src)
 8326 %{
 8327   match(Set mem (StoreD mem src));
 8328 
 8329   ins_cost(95); // XXX
 8330   format %{ "movsd   $mem, $src\t# double" %}
 8331   ins_encode %{
 8332     __ movdbl($mem$$Address, $src$$XMMRegister);
 8333   %}
 8334   ins_pipe(pipe_slow); // XXX
 8335 %}
 8336 
 8337 // Store immediate double 0.0 (it is faster than store from XMM register)
 8338 instruct storeD0_imm(memory mem, immD0 src)
 8339 %{
 8340   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8341   match(Set mem (StoreD mem src));
 8342 
 8343   ins_cost(50);
 8344   format %{ "movq    $mem, $src\t# double 0." %}
 8345   ins_encode %{
 8346     __ movq($mem$$Address, $src$$constant);
 8347   %}
 8348   ins_pipe(ialu_mem_imm);
 8349 %}
 8350 
 8351 instruct storeD0(memory mem, immD0 zero)
 8352 %{
 8353   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8354   match(Set mem (StoreD mem zero));
 8355 
 8356   ins_cost(25); // XXX
 8357   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8358   ins_encode %{
 8359     __ movq($mem$$Address, r12);
 8360   %}
 8361   ins_pipe(ialu_mem_reg);
 8362 %}
 8363 
 8364 instruct storeSSI(stackSlotI dst, rRegI src)
 8365 %{
 8366   match(Set dst src);
 8367 
 8368   ins_cost(100);
 8369   format %{ "movl    $dst, $src\t# int stk" %}
 8370   ins_encode %{
 8371     __ movl($dst$$Address, $src$$Register);
 8372   %}
 8373   ins_pipe( ialu_mem_reg );
 8374 %}
 8375 
 8376 instruct storeSSL(stackSlotL dst, rRegL src)
 8377 %{
 8378   match(Set dst src);
 8379 
 8380   ins_cost(100);
 8381   format %{ "movq    $dst, $src\t# long stk" %}
 8382   ins_encode %{
 8383     __ movq($dst$$Address, $src$$Register);
 8384   %}
 8385   ins_pipe(ialu_mem_reg);
 8386 %}
 8387 
 8388 instruct storeSSP(stackSlotP dst, rRegP src)
 8389 %{
 8390   match(Set dst src);
 8391 
 8392   ins_cost(100);
 8393   format %{ "movq    $dst, $src\t# ptr stk" %}
 8394   ins_encode %{
 8395     __ movq($dst$$Address, $src$$Register);
 8396   %}
 8397   ins_pipe(ialu_mem_reg);
 8398 %}
 8399 
 8400 instruct storeSSF(stackSlotF dst, regF src)
 8401 %{
 8402   match(Set dst src);
 8403 
 8404   ins_cost(95); // XXX
 8405   format %{ "movss   $dst, $src\t# float stk" %}
 8406   ins_encode %{
 8407     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8408   %}
 8409   ins_pipe(pipe_slow); // XXX
 8410 %}
 8411 
 8412 instruct storeSSD(stackSlotD dst, regD src)
 8413 %{
 8414   match(Set dst src);
 8415 
 8416   ins_cost(95); // XXX
 8417   format %{ "movsd   $dst, $src\t# double stk" %}
 8418   ins_encode %{
 8419     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8420   %}
 8421   ins_pipe(pipe_slow); // XXX
 8422 %}
 8423 
 8424 instruct cacheWB(indirect addr)
 8425 %{
 8426   predicate(VM_Version::supports_data_cache_line_flush());
 8427   match(CacheWB addr);
 8428 
 8429   ins_cost(100);
 8430   format %{"cache wb $addr" %}
 8431   ins_encode %{
 8432     assert($addr->index_position() < 0, "should be");
 8433     assert($addr$$disp == 0, "should be");
 8434     __ cache_wb(Address($addr$$base$$Register, 0));
 8435   %}
 8436   ins_pipe(pipe_slow); // XXX
 8437 %}
 8438 
 8439 instruct cacheWBPreSync()
 8440 %{
 8441   predicate(VM_Version::supports_data_cache_line_flush());
 8442   match(CacheWBPreSync);
 8443 
 8444   ins_cost(100);
 8445   format %{"cache wb presync" %}
 8446   ins_encode %{
 8447     __ cache_wbsync(true);
 8448   %}
 8449   ins_pipe(pipe_slow); // XXX
 8450 %}
 8451 
 8452 instruct cacheWBPostSync()
 8453 %{
 8454   predicate(VM_Version::supports_data_cache_line_flush());
 8455   match(CacheWBPostSync);
 8456 
 8457   ins_cost(100);
 8458   format %{"cache wb postsync" %}
 8459   ins_encode %{
 8460     __ cache_wbsync(false);
 8461   %}
 8462   ins_pipe(pipe_slow); // XXX
 8463 %}
 8464 
 8465 //----------BSWAP Instructions-------------------------------------------------
 8466 instruct bytes_reverse_int(rRegI dst) %{
 8467   match(Set dst (ReverseBytesI dst));
 8468 
 8469   format %{ "bswapl  $dst" %}
 8470   ins_encode %{
 8471     __ bswapl($dst$$Register);
 8472   %}
 8473   ins_pipe( ialu_reg );
 8474 %}
 8475 
 8476 instruct bytes_reverse_long(rRegL dst) %{
 8477   match(Set dst (ReverseBytesL dst));
 8478 
 8479   format %{ "bswapq  $dst" %}
 8480   ins_encode %{
 8481     __ bswapq($dst$$Register);
 8482   %}
 8483   ins_pipe( ialu_reg);
 8484 %}
 8485 
 8486 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8487   match(Set dst (ReverseBytesUS dst));
 8488   effect(KILL cr);
 8489 
 8490   format %{ "bswapl  $dst\n\t"
 8491             "shrl    $dst,16\n\t" %}
 8492   ins_encode %{
 8493     __ bswapl($dst$$Register);
 8494     __ shrl($dst$$Register, 16);
 8495   %}
 8496   ins_pipe( ialu_reg );
 8497 %}
 8498 
 8499 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8500   match(Set dst (ReverseBytesS dst));
 8501   effect(KILL cr);
 8502 
 8503   format %{ "bswapl  $dst\n\t"
 8504             "sar     $dst,16\n\t" %}
 8505   ins_encode %{
 8506     __ bswapl($dst$$Register);
 8507     __ sarl($dst$$Register, 16);
 8508   %}
 8509   ins_pipe( ialu_reg );
 8510 %}
 8511 
 8512 //---------- Zeros Count Instructions ------------------------------------------
 8513 
 8514 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8515   predicate(UseCountLeadingZerosInstruction);
 8516   match(Set dst (CountLeadingZerosI src));
 8517   effect(KILL cr);
 8518 
 8519   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8520   ins_encode %{
 8521     __ lzcntl($dst$$Register, $src$$Register);
 8522   %}
 8523   ins_pipe(ialu_reg);
 8524 %}
 8525 
 8526 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8527   predicate(UseCountLeadingZerosInstruction);
 8528   match(Set dst (CountLeadingZerosI (LoadI src)));
 8529   effect(KILL cr);
 8530   ins_cost(175);
 8531   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8532   ins_encode %{
 8533     __ lzcntl($dst$$Register, $src$$Address);
 8534   %}
 8535   ins_pipe(ialu_reg_mem);
 8536 %}
 8537 
 8538 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8539   predicate(!UseCountLeadingZerosInstruction);
 8540   match(Set dst (CountLeadingZerosI src));
 8541   effect(KILL cr);
 8542 
 8543   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8544             "jnz     skip\n\t"
 8545             "movl    $dst, -1\n"
 8546       "skip:\n\t"
 8547             "negl    $dst\n\t"
 8548             "addl    $dst, 31" %}
 8549   ins_encode %{
 8550     Register Rdst = $dst$$Register;
 8551     Register Rsrc = $src$$Register;
 8552     Label skip;
 8553     __ bsrl(Rdst, Rsrc);
 8554     __ jccb(Assembler::notZero, skip);
 8555     __ movl(Rdst, -1);
 8556     __ bind(skip);
 8557     __ negl(Rdst);
 8558     __ addl(Rdst, BitsPerInt - 1);
 8559   %}
 8560   ins_pipe(ialu_reg);
 8561 %}
 8562 
 8563 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8564   predicate(UseCountLeadingZerosInstruction);
 8565   match(Set dst (CountLeadingZerosL src));
 8566   effect(KILL cr);
 8567 
 8568   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8569   ins_encode %{
 8570     __ lzcntq($dst$$Register, $src$$Register);
 8571   %}
 8572   ins_pipe(ialu_reg);
 8573 %}
 8574 
 8575 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8576   predicate(UseCountLeadingZerosInstruction);
 8577   match(Set dst (CountLeadingZerosL (LoadL src)));
 8578   effect(KILL cr);
 8579   ins_cost(175);
 8580   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8581   ins_encode %{
 8582     __ lzcntq($dst$$Register, $src$$Address);
 8583   %}
 8584   ins_pipe(ialu_reg_mem);
 8585 %}
 8586 
 8587 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8588   predicate(!UseCountLeadingZerosInstruction);
 8589   match(Set dst (CountLeadingZerosL src));
 8590   effect(KILL cr);
 8591 
 8592   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8593             "jnz     skip\n\t"
 8594             "movl    $dst, -1\n"
 8595       "skip:\n\t"
 8596             "negl    $dst\n\t"
 8597             "addl    $dst, 63" %}
 8598   ins_encode %{
 8599     Register Rdst = $dst$$Register;
 8600     Register Rsrc = $src$$Register;
 8601     Label skip;
 8602     __ bsrq(Rdst, Rsrc);
 8603     __ jccb(Assembler::notZero, skip);
 8604     __ movl(Rdst, -1);
 8605     __ bind(skip);
 8606     __ negl(Rdst);
 8607     __ addl(Rdst, BitsPerLong - 1);
 8608   %}
 8609   ins_pipe(ialu_reg);
 8610 %}
 8611 
 8612 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8613   predicate(UseCountTrailingZerosInstruction);
 8614   match(Set dst (CountTrailingZerosI src));
 8615   effect(KILL cr);
 8616 
 8617   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8618   ins_encode %{
 8619     __ tzcntl($dst$$Register, $src$$Register);
 8620   %}
 8621   ins_pipe(ialu_reg);
 8622 %}
 8623 
 8624 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8625   predicate(UseCountTrailingZerosInstruction);
 8626   match(Set dst (CountTrailingZerosI (LoadI src)));
 8627   effect(KILL cr);
 8628   ins_cost(175);
 8629   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8630   ins_encode %{
 8631     __ tzcntl($dst$$Register, $src$$Address);
 8632   %}
 8633   ins_pipe(ialu_reg_mem);
 8634 %}
 8635 
 8636 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8637   predicate(!UseCountTrailingZerosInstruction);
 8638   match(Set dst (CountTrailingZerosI src));
 8639   effect(KILL cr);
 8640 
 8641   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8642             "jnz     done\n\t"
 8643             "movl    $dst, 32\n"
 8644       "done:" %}
 8645   ins_encode %{
 8646     Register Rdst = $dst$$Register;
 8647     Label done;
 8648     __ bsfl(Rdst, $src$$Register);
 8649     __ jccb(Assembler::notZero, done);
 8650     __ movl(Rdst, BitsPerInt);
 8651     __ bind(done);
 8652   %}
 8653   ins_pipe(ialu_reg);
 8654 %}
 8655 
 8656 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8657   predicate(UseCountTrailingZerosInstruction);
 8658   match(Set dst (CountTrailingZerosL src));
 8659   effect(KILL cr);
 8660 
 8661   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8662   ins_encode %{
 8663     __ tzcntq($dst$$Register, $src$$Register);
 8664   %}
 8665   ins_pipe(ialu_reg);
 8666 %}
 8667 
 8668 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8669   predicate(UseCountTrailingZerosInstruction);
 8670   match(Set dst (CountTrailingZerosL (LoadL src)));
 8671   effect(KILL cr);
 8672   ins_cost(175);
 8673   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8674   ins_encode %{
 8675     __ tzcntq($dst$$Register, $src$$Address);
 8676   %}
 8677   ins_pipe(ialu_reg_mem);
 8678 %}
 8679 
 8680 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8681   predicate(!UseCountTrailingZerosInstruction);
 8682   match(Set dst (CountTrailingZerosL src));
 8683   effect(KILL cr);
 8684 
 8685   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8686             "jnz     done\n\t"
 8687             "movl    $dst, 64\n"
 8688       "done:" %}
 8689   ins_encode %{
 8690     Register Rdst = $dst$$Register;
 8691     Label done;
 8692     __ bsfq(Rdst, $src$$Register);
 8693     __ jccb(Assembler::notZero, done);
 8694     __ movl(Rdst, BitsPerLong);
 8695     __ bind(done);
 8696   %}
 8697   ins_pipe(ialu_reg);
 8698 %}
 8699 
 8700 //--------------- Reverse Operation Instructions ----------------
 8701 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8702   predicate(!VM_Version::supports_gfni());
 8703   match(Set dst (ReverseI src));
 8704   effect(TEMP dst, TEMP rtmp, KILL cr);
 8705   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8706   ins_encode %{
 8707     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8708   %}
 8709   ins_pipe( ialu_reg );
 8710 %}
 8711 
 8712 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8713   predicate(VM_Version::supports_gfni());
 8714   match(Set dst (ReverseI src));
 8715   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8716   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8717   ins_encode %{
 8718     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8719   %}
 8720   ins_pipe( ialu_reg );
 8721 %}
 8722 
 8723 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8724   predicate(!VM_Version::supports_gfni());
 8725   match(Set dst (ReverseL src));
 8726   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8727   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8728   ins_encode %{
 8729     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8730   %}
 8731   ins_pipe( ialu_reg );
 8732 %}
 8733 
 8734 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8735   predicate(VM_Version::supports_gfni());
 8736   match(Set dst (ReverseL src));
 8737   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8738   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8739   ins_encode %{
 8740     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8741   %}
 8742   ins_pipe( ialu_reg );
 8743 %}
 8744 
 8745 //---------- Population Count Instructions -------------------------------------
 8746 
 8747 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8748   predicate(UsePopCountInstruction);
 8749   match(Set dst (PopCountI src));
 8750   effect(KILL cr);
 8751 
 8752   format %{ "popcnt  $dst, $src" %}
 8753   ins_encode %{
 8754     __ popcntl($dst$$Register, $src$$Register);
 8755   %}
 8756   ins_pipe(ialu_reg);
 8757 %}
 8758 
 8759 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8760   predicate(UsePopCountInstruction);
 8761   match(Set dst (PopCountI (LoadI mem)));
 8762   effect(KILL cr);
 8763 
 8764   format %{ "popcnt  $dst, $mem" %}
 8765   ins_encode %{
 8766     __ popcntl($dst$$Register, $mem$$Address);
 8767   %}
 8768   ins_pipe(ialu_reg);
 8769 %}
 8770 
 8771 // Note: Long.bitCount(long) returns an int.
 8772 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8773   predicate(UsePopCountInstruction);
 8774   match(Set dst (PopCountL src));
 8775   effect(KILL cr);
 8776 
 8777   format %{ "popcnt  $dst, $src" %}
 8778   ins_encode %{
 8779     __ popcntq($dst$$Register, $src$$Register);
 8780   %}
 8781   ins_pipe(ialu_reg);
 8782 %}
 8783 
 8784 // Note: Long.bitCount(long) returns an int.
 8785 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8786   predicate(UsePopCountInstruction);
 8787   match(Set dst (PopCountL (LoadL mem)));
 8788   effect(KILL cr);
 8789 
 8790   format %{ "popcnt  $dst, $mem" %}
 8791   ins_encode %{
 8792     __ popcntq($dst$$Register, $mem$$Address);
 8793   %}
 8794   ins_pipe(ialu_reg);
 8795 %}
 8796 
 8797 
 8798 //----------MemBar Instructions-----------------------------------------------
 8799 // Memory barrier flavors
 8800 
 8801 instruct membar_acquire()
 8802 %{
 8803   match(MemBarAcquire);
 8804   match(LoadFence);
 8805   ins_cost(0);
 8806 
 8807   size(0);
 8808   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8809   ins_encode();
 8810   ins_pipe(empty);
 8811 %}
 8812 
 8813 instruct membar_acquire_lock()
 8814 %{
 8815   match(MemBarAcquireLock);
 8816   ins_cost(0);
 8817 
 8818   size(0);
 8819   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8820   ins_encode();
 8821   ins_pipe(empty);
 8822 %}
 8823 
 8824 instruct membar_release()
 8825 %{
 8826   match(MemBarRelease);
 8827   match(StoreFence);
 8828   ins_cost(0);
 8829 
 8830   size(0);
 8831   format %{ "MEMBAR-release ! (empty encoding)" %}
 8832   ins_encode();
 8833   ins_pipe(empty);
 8834 %}
 8835 
 8836 instruct membar_release_lock()
 8837 %{
 8838   match(MemBarReleaseLock);
 8839   ins_cost(0);
 8840 
 8841   size(0);
 8842   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8843   ins_encode();
 8844   ins_pipe(empty);
 8845 %}
 8846 
 8847 instruct membar_volatile(rFlagsReg cr) %{
 8848   match(MemBarVolatile);
 8849   effect(KILL cr);
 8850   ins_cost(400);
 8851 
 8852   format %{
 8853     $$template
 8854     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8855   %}
 8856   ins_encode %{
 8857     __ membar(Assembler::StoreLoad);
 8858   %}
 8859   ins_pipe(pipe_slow);
 8860 %}
 8861 
 8862 instruct unnecessary_membar_volatile()
 8863 %{
 8864   match(MemBarVolatile);
 8865   predicate(Matcher::post_store_load_barrier(n));
 8866   ins_cost(0);
 8867 
 8868   size(0);
 8869   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8870   ins_encode();
 8871   ins_pipe(empty);
 8872 %}
 8873 
 8874 instruct membar_storestore() %{
 8875   match(MemBarStoreStore);
 8876   match(StoreStoreFence);
 8877   ins_cost(0);
 8878 
 8879   size(0);
 8880   format %{ "MEMBAR-storestore (empty encoding)" %}
 8881   ins_encode( );
 8882   ins_pipe(empty);
 8883 %}
 8884 
 8885 //----------Move Instructions--------------------------------------------------
 8886 
 8887 instruct castX2P(rRegP dst, rRegL src)
 8888 %{
 8889   match(Set dst (CastX2P src));
 8890 
 8891   format %{ "movq    $dst, $src\t# long->ptr" %}
 8892   ins_encode %{
 8893     if ($dst$$reg != $src$$reg) {
 8894       __ movptr($dst$$Register, $src$$Register);
 8895     }
 8896   %}
 8897   ins_pipe(ialu_reg_reg); // XXX
 8898 %}
 8899 
 8900 instruct castI2N(rRegN dst, rRegI src)
 8901 %{
 8902   match(Set dst (CastI2N src));
 8903 
 8904   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 8905   ins_encode %{
 8906     if ($dst$$reg != $src$$reg) {
 8907       __ movl($dst$$Register, $src$$Register);
 8908     }
 8909   %}
 8910   ins_pipe(ialu_reg_reg); // XXX
 8911 %}
 8912 
 8913 instruct castN2X(rRegL dst, rRegN src)
 8914 %{
 8915   match(Set dst (CastP2X src));
 8916 
 8917   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8918   ins_encode %{
 8919     if ($dst$$reg != $src$$reg) {
 8920       __ movptr($dst$$Register, $src$$Register);
 8921     }
 8922   %}
 8923   ins_pipe(ialu_reg_reg); // XXX
 8924 %}
 8925 
 8926 instruct castP2X(rRegL dst, rRegP src)
 8927 %{
 8928   match(Set dst (CastP2X src));
 8929 
 8930   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8931   ins_encode %{
 8932     if ($dst$$reg != $src$$reg) {
 8933       __ movptr($dst$$Register, $src$$Register);
 8934     }
 8935   %}
 8936   ins_pipe(ialu_reg_reg); // XXX
 8937 %}
 8938 
 8939 // Convert oop into int for vectors alignment masking
 8940 instruct convP2I(rRegI dst, rRegP src)
 8941 %{
 8942   match(Set dst (ConvL2I (CastP2X src)));
 8943 
 8944   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8945   ins_encode %{
 8946     __ movl($dst$$Register, $src$$Register);
 8947   %}
 8948   ins_pipe(ialu_reg_reg); // XXX
 8949 %}
 8950 
 8951 // Convert compressed oop into int for vectors alignment masking
 8952 // in case of 32bit oops (heap < 4Gb).
 8953 instruct convN2I(rRegI dst, rRegN src)
 8954 %{
 8955   predicate(CompressedOops::shift() == 0);
 8956   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 8957 
 8958   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 8959   ins_encode %{
 8960     __ movl($dst$$Register, $src$$Register);
 8961   %}
 8962   ins_pipe(ialu_reg_reg); // XXX
 8963 %}
 8964 
 8965 // Convert oop pointer into compressed form
 8966 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 8967   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 8968   match(Set dst (EncodeP src));
 8969   effect(KILL cr);
 8970   format %{ "encode_heap_oop $dst,$src" %}
 8971   ins_encode %{
 8972     Register s = $src$$Register;
 8973     Register d = $dst$$Register;
 8974     if (s != d) {
 8975       __ movq(d, s);
 8976     }
 8977     __ encode_heap_oop(d);
 8978   %}
 8979   ins_pipe(ialu_reg_long);
 8980 %}
 8981 
 8982 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8983   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 8984   match(Set dst (EncodeP src));
 8985   effect(KILL cr);
 8986   format %{ "encode_heap_oop_not_null $dst,$src" %}
 8987   ins_encode %{
 8988     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 8989   %}
 8990   ins_pipe(ialu_reg_long);
 8991 %}
 8992 
 8993 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 8994   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 8995             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 8996   match(Set dst (DecodeN src));
 8997   effect(KILL cr);
 8998   format %{ "decode_heap_oop $dst,$src" %}
 8999   ins_encode %{
 9000     Register s = $src$$Register;
 9001     Register d = $dst$$Register;
 9002     if (s != d) {
 9003       __ movq(d, s);
 9004     }
 9005     __ decode_heap_oop(d);
 9006   %}
 9007   ins_pipe(ialu_reg_long);
 9008 %}
 9009 
 9010 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9011   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 9012             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 9013   match(Set dst (DecodeN src));
 9014   effect(KILL cr);
 9015   format %{ "decode_heap_oop_not_null $dst,$src" %}
 9016   ins_encode %{
 9017     Register s = $src$$Register;
 9018     Register d = $dst$$Register;
 9019     if (s != d) {
 9020       __ decode_heap_oop_not_null(d, s);
 9021     } else {
 9022       __ decode_heap_oop_not_null(d);
 9023     }
 9024   %}
 9025   ins_pipe(ialu_reg_long);
 9026 %}
 9027 
 9028 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9029   match(Set dst (EncodePKlass src));
 9030   effect(TEMP dst, KILL cr);
 9031   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 9032   ins_encode %{
 9033     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9034   %}
 9035   ins_pipe(ialu_reg_long);
 9036 %}
 9037 
 9038 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9039   match(Set dst (DecodeNKlass src));
 9040   effect(TEMP dst, KILL cr);
 9041   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 9042   ins_encode %{
 9043     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9044   %}
 9045   ins_pipe(ialu_reg_long);
 9046 %}
 9047 
 9048 //----------Conditional Move---------------------------------------------------
 9049 // Jump
 9050 // dummy instruction for generating temp registers
 9051 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 9052   match(Jump (LShiftL switch_val shift));
 9053   ins_cost(350);
 9054   predicate(false);
 9055   effect(TEMP dest);
 9056 
 9057   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9058             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 9059   ins_encode %{
 9060     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9061     // to do that and the compiler is using that register as one it can allocate.
 9062     // So we build it all by hand.
 9063     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9064     // ArrayAddress dispatch(table, index);
 9065     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9066     __ lea($dest$$Register, $constantaddress);
 9067     __ jmp(dispatch);
 9068   %}
 9069   ins_pipe(pipe_jmp);
 9070 %}
 9071 
 9072 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9073   match(Jump (AddL (LShiftL switch_val shift) offset));
 9074   ins_cost(350);
 9075   effect(TEMP dest);
 9076 
 9077   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9078             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9079   ins_encode %{
 9080     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9081     // to do that and the compiler is using that register as one it can allocate.
 9082     // So we build it all by hand.
 9083     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9084     // ArrayAddress dispatch(table, index);
 9085     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9086     __ lea($dest$$Register, $constantaddress);
 9087     __ jmp(dispatch);
 9088   %}
 9089   ins_pipe(pipe_jmp);
 9090 %}
 9091 
 9092 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9093   match(Jump switch_val);
 9094   ins_cost(350);
 9095   effect(TEMP dest);
 9096 
 9097   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9098             "jmp     [$dest + $switch_val]\n\t" %}
 9099   ins_encode %{
 9100     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9101     // to do that and the compiler is using that register as one it can allocate.
 9102     // So we build it all by hand.
 9103     // Address index(noreg, switch_reg, Address::times_1);
 9104     // ArrayAddress dispatch(table, index);
 9105     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9106     __ lea($dest$$Register, $constantaddress);
 9107     __ jmp(dispatch);
 9108   %}
 9109   ins_pipe(pipe_jmp);
 9110 %}
 9111 
 9112 // Conditional move
 9113 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9114 %{
 9115   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9116   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9117 
 9118   ins_cost(100); // XXX
 9119   format %{ "setbn$cop $dst\t# signed, int" %}
 9120   ins_encode %{
 9121     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9122     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9123   %}
 9124   ins_pipe(ialu_reg);
 9125 %}
 9126 
 9127 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9128 %{
 9129   predicate(!UseAPX);
 9130   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9131 
 9132   ins_cost(200); // XXX
 9133   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9134   ins_encode %{
 9135     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9136   %}
 9137   ins_pipe(pipe_cmov_reg);
 9138 %}
 9139 
 9140 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9141 %{
 9142   predicate(UseAPX);
 9143   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9144 
 9145   ins_cost(200);
 9146   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9147   ins_encode %{
 9148     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9149   %}
 9150   ins_pipe(pipe_cmov_reg);
 9151 %}
 9152 
 9153 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9154 %{
 9155   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9156   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9157 
 9158   ins_cost(100); // XXX
 9159   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9160   ins_encode %{
 9161     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9162     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9163   %}
 9164   ins_pipe(ialu_reg);
 9165 %}
 9166 
 9167 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9168   predicate(!UseAPX);
 9169   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9170 
 9171   ins_cost(200); // XXX
 9172   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9173   ins_encode %{
 9174     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9175   %}
 9176   ins_pipe(pipe_cmov_reg);
 9177 %}
 9178 
 9179 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9180   predicate(UseAPX);
 9181   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9182 
 9183   ins_cost(200);
 9184   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9185   ins_encode %{
 9186     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9187   %}
 9188   ins_pipe(pipe_cmov_reg);
 9189 %}
 9190 
 9191 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9192 %{
 9193   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9194   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9195 
 9196   ins_cost(100); // XXX
 9197   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9198   ins_encode %{
 9199     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9200     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9201   %}
 9202   ins_pipe(ialu_reg);
 9203 %}
 9204 
 9205 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9206   predicate(!UseAPX);
 9207   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9208   ins_cost(200);
 9209   expand %{
 9210     cmovI_regU(cop, cr, dst, src);
 9211   %}
 9212 %}
 9213 
 9214 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
 9215   predicate(UseAPX);
 9216   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9217   ins_cost(200);
 9218   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9219   ins_encode %{
 9220     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9221   %}
 9222   ins_pipe(pipe_cmov_reg);
 9223 %}
 9224 
 9225 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9226   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9227   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9228 
 9229   ins_cost(200); // XXX
 9230   format %{ "cmovpl  $dst, $src\n\t"
 9231             "cmovnel $dst, $src" %}
 9232   ins_encode %{
 9233     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9234     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9235   %}
 9236   ins_pipe(pipe_cmov_reg);
 9237 %}
 9238 
 9239 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
 9240   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9241   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9242   effect(TEMP dst);
 9243 
 9244   ins_cost(200);
 9245   format %{ "ecmovpl  $dst, $src1, $src2\n\t"
 9246             "cmovnel  $dst, $src2" %}
 9247   ins_encode %{
 9248     __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9249     __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9250   %}
 9251   ins_pipe(pipe_cmov_reg);
 9252 %}
 9253 
 9254 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9255 // inputs of the CMove
 9256 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9257   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9258   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9259   effect(TEMP dst);
 9260 
 9261   ins_cost(200); // XXX
 9262   format %{ "cmovpl  $dst, $src\n\t"
 9263             "cmovnel $dst, $src" %}
 9264   ins_encode %{
 9265     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9266     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9267   %}
 9268   ins_pipe(pipe_cmov_reg);
 9269 %}
 9270 
 9271 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
 9272 // and parity flag bit is set if any of the operand is a NaN.
 9273 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
 9274   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9275   match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
 9276   effect(TEMP dst);
 9277 
 9278   ins_cost(200);
 9279   format %{ "ecmovpl  $dst, $src1, $src2\n\t"
 9280             "cmovnel  $dst, $src2" %}
 9281   ins_encode %{
 9282     __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9283     __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9284   %}
 9285   ins_pipe(pipe_cmov_reg);
 9286 %}
 9287 
 9288 // Conditional move
 9289 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9290   predicate(!UseAPX);
 9291   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9292 
 9293   ins_cost(250); // XXX
 9294   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9295   ins_encode %{
 9296     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9297   %}
 9298   ins_pipe(pipe_cmov_mem);
 9299 %}
 9300 
 9301 // Conditional move
 9302 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9303 %{
 9304   predicate(UseAPX);
 9305   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9306 
 9307   ins_cost(250);
 9308   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9309   ins_encode %{
 9310     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9311   %}
 9312   ins_pipe(pipe_cmov_mem);
 9313 %}
 9314 
 9315 // Conditional move
 9316 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9317 %{
 9318   predicate(!UseAPX);
 9319   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9320 
 9321   ins_cost(250); // XXX
 9322   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9323   ins_encode %{
 9324     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9325   %}
 9326   ins_pipe(pipe_cmov_mem);
 9327 %}
 9328 
 9329 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9330   predicate(!UseAPX);
 9331   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9332   ins_cost(250);
 9333   expand %{
 9334     cmovI_memU(cop, cr, dst, src);
 9335   %}
 9336 %}
 9337 
 9338 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9339 %{
 9340   predicate(UseAPX);
 9341   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9342 
 9343   ins_cost(250);
 9344   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9345   ins_encode %{
 9346     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9347   %}
 9348   ins_pipe(pipe_cmov_mem);
 9349 %}
 9350 
 9351 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
 9352 %{
 9353   predicate(UseAPX);
 9354   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9355   ins_cost(250);
 9356   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9357   ins_encode %{
 9358     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9359   %}
 9360   ins_pipe(pipe_cmov_mem);
 9361 %}
 9362 
 9363 // Conditional move
 9364 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9365 %{
 9366   predicate(!UseAPX);
 9367   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9368 
 9369   ins_cost(200); // XXX
 9370   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9371   ins_encode %{
 9372     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9373   %}
 9374   ins_pipe(pipe_cmov_reg);
 9375 %}
 9376 
 9377 // Conditional move ndd
 9378 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9379 %{
 9380   predicate(UseAPX);
 9381   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9382 
 9383   ins_cost(200);
 9384   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9385   ins_encode %{
 9386     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9387   %}
 9388   ins_pipe(pipe_cmov_reg);
 9389 %}
 9390 
 9391 // Conditional move
 9392 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9393 %{
 9394   predicate(!UseAPX);
 9395   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9396 
 9397   ins_cost(200); // XXX
 9398   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9399   ins_encode %{
 9400     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9401   %}
 9402   ins_pipe(pipe_cmov_reg);
 9403 %}
 9404 
 9405 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9406   predicate(!UseAPX);
 9407   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9408   ins_cost(200);
 9409   expand %{
 9410     cmovN_regU(cop, cr, dst, src);
 9411   %}
 9412 %}
 9413 
 9414 // Conditional move ndd
 9415 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9416 %{
 9417   predicate(UseAPX);
 9418   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9419 
 9420   ins_cost(200);
 9421   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9422   ins_encode %{
 9423     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9424   %}
 9425   ins_pipe(pipe_cmov_reg);
 9426 %}
 9427 
 9428 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
 9429   predicate(UseAPX);
 9430   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9431   ins_cost(200);
 9432   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9433   ins_encode %{
 9434     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9435   %}
 9436   ins_pipe(pipe_cmov_reg);
 9437 %}
 9438 
 9439 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9440   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9441   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9442 
 9443   ins_cost(200); // XXX
 9444   format %{ "cmovpl  $dst, $src\n\t"
 9445             "cmovnel $dst, $src" %}
 9446   ins_encode %{
 9447     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9448     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9449   %}
 9450   ins_pipe(pipe_cmov_reg);
 9451 %}
 9452 
 9453 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9454 // inputs of the CMove
 9455 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9456   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9457   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9458 
 9459   ins_cost(200); // XXX
 9460   format %{ "cmovpl  $dst, $src\n\t"
 9461             "cmovnel $dst, $src" %}
 9462   ins_encode %{
 9463     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9464     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9465   %}
 9466   ins_pipe(pipe_cmov_reg);
 9467 %}
 9468 
 9469 // Conditional move
 9470 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9471 %{
 9472   predicate(!UseAPX);
 9473   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9474 
 9475   ins_cost(200); // XXX
 9476   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9477   ins_encode %{
 9478     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9479   %}
 9480   ins_pipe(pipe_cmov_reg);  // XXX
 9481 %}
 9482 
 9483 // Conditional move ndd
 9484 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9485 %{
 9486   predicate(UseAPX);
 9487   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9488 
 9489   ins_cost(200);
 9490   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9491   ins_encode %{
 9492     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9493   %}
 9494   ins_pipe(pipe_cmov_reg);
 9495 %}
 9496 
 9497 // Conditional move
 9498 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9499 %{
 9500   predicate(!UseAPX);
 9501   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9502 
 9503   ins_cost(200); // XXX
 9504   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9505   ins_encode %{
 9506     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9507   %}
 9508   ins_pipe(pipe_cmov_reg); // XXX
 9509 %}
 9510 
 9511 // Conditional move ndd
 9512 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9513 %{
 9514   predicate(UseAPX);
 9515   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9516 
 9517   ins_cost(200);
 9518   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9519   ins_encode %{
 9520     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9521   %}
 9522   ins_pipe(pipe_cmov_reg);
 9523 %}
 9524 
 9525 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9526   predicate(!UseAPX);
 9527   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9528   ins_cost(200);
 9529   expand %{
 9530     cmovP_regU(cop, cr, dst, src);
 9531   %}
 9532 %}
 9533 
 9534 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
 9535   predicate(UseAPX);
 9536   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9537   ins_cost(200);
 9538   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9539   ins_encode %{
 9540     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9541   %}
 9542   ins_pipe(pipe_cmov_reg);
 9543 %}
 9544 
 9545 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9546   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9547   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9548 
 9549   ins_cost(200); // XXX
 9550   format %{ "cmovpq  $dst, $src\n\t"
 9551             "cmovneq $dst, $src" %}
 9552   ins_encode %{
 9553     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9554     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9555   %}
 9556   ins_pipe(pipe_cmov_reg);
 9557 %}
 9558 
 9559 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
 9560   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9561   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9562   effect(TEMP dst);
 9563 
 9564   ins_cost(200);
 9565   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9566             "cmovneq  $dst, $src2" %}
 9567   ins_encode %{
 9568     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9569     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9570   %}
 9571   ins_pipe(pipe_cmov_reg);
 9572 %}
 9573 
 9574 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9575 // inputs of the CMove
 9576 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9577   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9578   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9579 
 9580   ins_cost(200); // XXX
 9581   format %{ "cmovpq  $dst, $src\n\t"
 9582             "cmovneq $dst, $src" %}
 9583   ins_encode %{
 9584     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9585     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9586   %}
 9587   ins_pipe(pipe_cmov_reg);
 9588 %}
 9589 
 9590 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
 9591   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9592   match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
 9593   effect(TEMP dst);
 9594 
 9595   ins_cost(200);
 9596   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9597             "cmovneq  $dst, $src2" %}
 9598   ins_encode %{
 9599     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9600     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9601   %}
 9602   ins_pipe(pipe_cmov_reg);
 9603 %}
 9604 
 9605 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9606 %{
 9607   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9608   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9609 
 9610   ins_cost(100); // XXX
 9611   format %{ "setbn$cop $dst\t# signed, long" %}
 9612   ins_encode %{
 9613     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9614     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9615   %}
 9616   ins_pipe(ialu_reg);
 9617 %}
 9618 
 9619 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9620 %{
 9621   predicate(!UseAPX);
 9622   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9623 
 9624   ins_cost(200); // XXX
 9625   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9626   ins_encode %{
 9627     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9628   %}
 9629   ins_pipe(pipe_cmov_reg);  // XXX
 9630 %}
 9631 
 9632 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9633 %{
 9634   predicate(UseAPX);
 9635   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9636 
 9637   ins_cost(200);
 9638   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9639   ins_encode %{
 9640     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9641   %}
 9642   ins_pipe(pipe_cmov_reg);
 9643 %}
 9644 
 9645 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9646 %{
 9647   predicate(!UseAPX);
 9648   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9649 
 9650   ins_cost(200); // XXX
 9651   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9652   ins_encode %{
 9653     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9654   %}
 9655   ins_pipe(pipe_cmov_mem);  // XXX
 9656 %}
 9657 
 9658 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9659 %{
 9660   predicate(UseAPX);
 9661   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9662 
 9663   ins_cost(200);
 9664   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9665   ins_encode %{
 9666     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9667   %}
 9668   ins_pipe(pipe_cmov_mem);
 9669 %}
 9670 
 9671 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9672 %{
 9673   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9674   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9675 
 9676   ins_cost(100); // XXX
 9677   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9678   ins_encode %{
 9679     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9680     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9681   %}
 9682   ins_pipe(ialu_reg);
 9683 %}
 9684 
 9685 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9686 %{
 9687   predicate(!UseAPX);
 9688   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9689 
 9690   ins_cost(200); // XXX
 9691   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9692   ins_encode %{
 9693     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9694   %}
 9695   ins_pipe(pipe_cmov_reg); // XXX
 9696 %}
 9697 
 9698 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9699 %{
 9700   predicate(UseAPX);
 9701   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9702 
 9703   ins_cost(200);
 9704   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9705   ins_encode %{
 9706     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9707   %}
 9708   ins_pipe(pipe_cmov_reg);
 9709 %}
 9710 
 9711 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9712 %{
 9713   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9714   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9715 
 9716   ins_cost(100); // XXX
 9717   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9718   ins_encode %{
 9719     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9720     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9721   %}
 9722   ins_pipe(ialu_reg);
 9723 %}
 9724 
 9725 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9726   predicate(!UseAPX);
 9727   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9728   ins_cost(200);
 9729   expand %{
 9730     cmovL_regU(cop, cr, dst, src);
 9731   %}
 9732 %}
 9733 
 9734 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
 9735 %{
 9736   predicate(UseAPX);
 9737   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9738   ins_cost(200);
 9739   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9740   ins_encode %{
 9741     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9742   %}
 9743   ins_pipe(pipe_cmov_reg);
 9744 %}
 9745 
 9746 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9747   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9748   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9749 
 9750   ins_cost(200); // XXX
 9751   format %{ "cmovpq  $dst, $src\n\t"
 9752             "cmovneq $dst, $src" %}
 9753   ins_encode %{
 9754     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9755     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9756   %}
 9757   ins_pipe(pipe_cmov_reg);
 9758 %}
 9759 
 9760 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
 9761   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9762   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9763   effect(TEMP dst);
 9764 
 9765   ins_cost(200);
 9766   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9767             "cmovneq  $dst, $src2" %}
 9768   ins_encode %{
 9769     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9770     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9771   %}
 9772   ins_pipe(pipe_cmov_reg);
 9773 %}
 9774 
 9775 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9776 // inputs of the CMove
 9777 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9778   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9779   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9780 
 9781   ins_cost(200); // XXX
 9782   format %{ "cmovpq  $dst, $src\n\t"
 9783             "cmovneq $dst, $src" %}
 9784   ins_encode %{
 9785     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9786     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9787   %}
 9788   ins_pipe(pipe_cmov_reg);
 9789 %}
 9790 
 9791 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
 9792   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9793   match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
 9794   effect(TEMP dst);
 9795 
 9796   ins_cost(200);
 9797   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9798             "cmovneq $dst, $src2" %}
 9799   ins_encode %{
 9800     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9801     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9802   %}
 9803   ins_pipe(pipe_cmov_reg);
 9804 %}
 9805 
 9806 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9807 %{
 9808   predicate(!UseAPX);
 9809   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9810 
 9811   ins_cost(200); // XXX
 9812   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9813   ins_encode %{
 9814     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9815   %}
 9816   ins_pipe(pipe_cmov_mem); // XXX
 9817 %}
 9818 
 9819 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9820   predicate(!UseAPX);
 9821   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9822   ins_cost(200);
 9823   expand %{
 9824     cmovL_memU(cop, cr, dst, src);
 9825   %}
 9826 %}
 9827 
 9828 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9829 %{
 9830   predicate(UseAPX);
 9831   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9832 
 9833   ins_cost(200);
 9834   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9835   ins_encode %{
 9836     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9837   %}
 9838   ins_pipe(pipe_cmov_mem);
 9839 %}
 9840 
 9841 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
 9842 %{
 9843   predicate(UseAPX);
 9844   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9845   ins_cost(200);
 9846   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9847   ins_encode %{
 9848     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9849   %}
 9850   ins_pipe(pipe_cmov_mem);
 9851 %}
 9852 
 9853 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9854 %{
 9855   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9856 
 9857   ins_cost(200); // XXX
 9858   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9859             "movss     $dst, $src\n"
 9860     "skip:" %}
 9861   ins_encode %{
 9862     Label Lskip;
 9863     // Invert sense of branch from sense of CMOV
 9864     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9865     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9866     __ bind(Lskip);
 9867   %}
 9868   ins_pipe(pipe_slow);
 9869 %}
 9870 
 9871 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9872 %{
 9873   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9874 
 9875   ins_cost(200); // XXX
 9876   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9877             "movss     $dst, $src\n"
 9878     "skip:" %}
 9879   ins_encode %{
 9880     Label Lskip;
 9881     // Invert sense of branch from sense of CMOV
 9882     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9883     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9884     __ bind(Lskip);
 9885   %}
 9886   ins_pipe(pipe_slow);
 9887 %}
 9888 
 9889 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9890   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9891   ins_cost(200);
 9892   expand %{
 9893     cmovF_regU(cop, cr, dst, src);
 9894   %}
 9895 %}
 9896 
 9897 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9898 %{
 9899   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9900 
 9901   ins_cost(200); // XXX
 9902   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9903             "movsd     $dst, $src\n"
 9904     "skip:" %}
 9905   ins_encode %{
 9906     Label Lskip;
 9907     // Invert sense of branch from sense of CMOV
 9908     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9909     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9910     __ bind(Lskip);
 9911   %}
 9912   ins_pipe(pipe_slow);
 9913 %}
 9914 
 9915 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9916 %{
 9917   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9918 
 9919   ins_cost(200); // XXX
 9920   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9921             "movsd     $dst, $src\n"
 9922     "skip:" %}
 9923   ins_encode %{
 9924     Label Lskip;
 9925     // Invert sense of branch from sense of CMOV
 9926     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9927     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9928     __ bind(Lskip);
 9929   %}
 9930   ins_pipe(pipe_slow);
 9931 %}
 9932 
 9933 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9934   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9935   ins_cost(200);
 9936   expand %{
 9937     cmovD_regU(cop, cr, dst, src);
 9938   %}
 9939 %}
 9940 
 9941 //----------Arithmetic Instructions--------------------------------------------
 9942 //----------Addition Instructions----------------------------------------------
 9943 
 9944 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9945 %{
 9946   predicate(!UseAPX);
 9947   match(Set dst (AddI dst src));
 9948   effect(KILL cr);
 9949   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9950   format %{ "addl    $dst, $src\t# int" %}
 9951   ins_encode %{
 9952     __ addl($dst$$Register, $src$$Register);
 9953   %}
 9954   ins_pipe(ialu_reg_reg);
 9955 %}
 9956 
 9957 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9958 %{
 9959   predicate(UseAPX);
 9960   match(Set dst (AddI src1 src2));
 9961   effect(KILL cr);
 9962   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
 9963 
 9964   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9965   ins_encode %{
 9966     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9967   %}
 9968   ins_pipe(ialu_reg_reg);
 9969 %}
 9970 
 9971 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9972 %{
 9973   predicate(!UseAPX);
 9974   match(Set dst (AddI dst src));
 9975   effect(KILL cr);
 9976   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9977 
 9978   format %{ "addl    $dst, $src\t# int" %}
 9979   ins_encode %{
 9980     __ addl($dst$$Register, $src$$constant);
 9981   %}
 9982   ins_pipe( ialu_reg );
 9983 %}
 9984 
 9985 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9986 %{
 9987   predicate(UseAPX);
 9988   match(Set dst (AddI src1 src2));
 9989   effect(KILL cr);
 9990   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
 9991 
 9992   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9993   ins_encode %{
 9994     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9995   %}
 9996   ins_pipe( ialu_reg );
 9997 %}
 9998 
 9999 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
10000 %{
10001   predicate(UseAPX);
10002   match(Set dst (AddI (LoadI src1) src2));
10003   effect(KILL cr);
10004   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10005 
10006   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10007   ins_encode %{
10008     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
10009   %}
10010   ins_pipe( ialu_reg );
10011 %}
10012 
10013 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10014 %{
10015   predicate(!UseAPX);
10016   match(Set dst (AddI dst (LoadI src)));
10017   effect(KILL cr);
10018   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10019 
10020   ins_cost(150); // XXX
10021   format %{ "addl    $dst, $src\t# int" %}
10022   ins_encode %{
10023     __ addl($dst$$Register, $src$$Address);
10024   %}
10025   ins_pipe(ialu_reg_mem);
10026 %}
10027 
10028 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10029 %{
10030   predicate(UseAPX);
10031   match(Set dst (AddI src1 (LoadI src2)));
10032   effect(KILL cr);
10033   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10034 
10035   ins_cost(150);
10036   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10037   ins_encode %{
10038     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10039   %}
10040   ins_pipe(ialu_reg_mem);
10041 %}
10042 
10043 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10044 %{
10045   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10046   effect(KILL cr);
10047   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10048 
10049   ins_cost(150); // XXX
10050   format %{ "addl    $dst, $src\t# int" %}
10051   ins_encode %{
10052     __ addl($dst$$Address, $src$$Register);
10053   %}
10054   ins_pipe(ialu_mem_reg);
10055 %}
10056 
10057 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10058 %{
10059   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10060   effect(KILL cr);
10061   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10062 
10063 
10064   ins_cost(125); // XXX
10065   format %{ "addl    $dst, $src\t# int" %}
10066   ins_encode %{
10067     __ addl($dst$$Address, $src$$constant);
10068   %}
10069   ins_pipe(ialu_mem_imm);
10070 %}
10071 
10072 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10073 %{
10074   predicate(!UseAPX && UseIncDec);
10075   match(Set dst (AddI dst src));
10076   effect(KILL cr);
10077 
10078   format %{ "incl    $dst\t# int" %}
10079   ins_encode %{
10080     __ incrementl($dst$$Register);
10081   %}
10082   ins_pipe(ialu_reg);
10083 %}
10084 
10085 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10086 %{
10087   predicate(UseAPX && UseIncDec);
10088   match(Set dst (AddI src val));
10089   effect(KILL cr);
10090   flag(PD::Flag_ndd_demotable_opr1);
10091 
10092   format %{ "eincl    $dst, $src\t# int ndd" %}
10093   ins_encode %{
10094     __ eincl($dst$$Register, $src$$Register, false);
10095   %}
10096   ins_pipe(ialu_reg);
10097 %}
10098 
10099 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10100 %{
10101   predicate(UseAPX && UseIncDec);
10102   match(Set dst (AddI (LoadI src) val));
10103   effect(KILL cr);
10104 
10105   format %{ "eincl    $dst, $src\t# int ndd" %}
10106   ins_encode %{
10107     __ eincl($dst$$Register, $src$$Address, false);
10108   %}
10109   ins_pipe(ialu_reg);
10110 %}
10111 
10112 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10113 %{
10114   predicate(UseIncDec);
10115   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10116   effect(KILL cr);
10117 
10118   ins_cost(125); // XXX
10119   format %{ "incl    $dst\t# int" %}
10120   ins_encode %{
10121     __ incrementl($dst$$Address);
10122   %}
10123   ins_pipe(ialu_mem_imm);
10124 %}
10125 
10126 // XXX why does that use AddI
10127 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10128 %{
10129   predicate(!UseAPX && UseIncDec);
10130   match(Set dst (AddI dst src));
10131   effect(KILL cr);
10132 
10133   format %{ "decl    $dst\t# int" %}
10134   ins_encode %{
10135     __ decrementl($dst$$Register);
10136   %}
10137   ins_pipe(ialu_reg);
10138 %}
10139 
10140 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10141 %{
10142   predicate(UseAPX && UseIncDec);
10143   match(Set dst (AddI src val));
10144   effect(KILL cr);
10145   flag(PD::Flag_ndd_demotable_opr1);
10146 
10147   format %{ "edecl    $dst, $src\t# int ndd" %}
10148   ins_encode %{
10149     __ edecl($dst$$Register, $src$$Register, false);
10150   %}
10151   ins_pipe(ialu_reg);
10152 %}
10153 
10154 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10155 %{
10156   predicate(UseAPX && UseIncDec);
10157   match(Set dst (AddI (LoadI src) val));
10158   effect(KILL cr);
10159 
10160   format %{ "edecl    $dst, $src\t# int ndd" %}
10161   ins_encode %{
10162     __ edecl($dst$$Register, $src$$Address, false);
10163   %}
10164   ins_pipe(ialu_reg);
10165 %}
10166 
10167 // XXX why does that use AddI
10168 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10169 %{
10170   predicate(UseIncDec);
10171   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10172   effect(KILL cr);
10173 
10174   ins_cost(125); // XXX
10175   format %{ "decl    $dst\t# int" %}
10176   ins_encode %{
10177     __ decrementl($dst$$Address);
10178   %}
10179   ins_pipe(ialu_mem_imm);
10180 %}
10181 
10182 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10183 %{
10184   predicate(VM_Version::supports_fast_2op_lea());
10185   match(Set dst (AddI (LShiftI index scale) disp));
10186 
10187   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10188   ins_encode %{
10189     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10190     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10191   %}
10192   ins_pipe(ialu_reg_reg);
10193 %}
10194 
10195 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10196 %{
10197   predicate(VM_Version::supports_fast_3op_lea());
10198   match(Set dst (AddI (AddI base index) disp));
10199 
10200   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10201   ins_encode %{
10202     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10203   %}
10204   ins_pipe(ialu_reg_reg);
10205 %}
10206 
10207 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10208 %{
10209   predicate(VM_Version::supports_fast_2op_lea());
10210   match(Set dst (AddI base (LShiftI index scale)));
10211 
10212   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10213   ins_encode %{
10214     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10215     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10216   %}
10217   ins_pipe(ialu_reg_reg);
10218 %}
10219 
10220 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10221 %{
10222   predicate(VM_Version::supports_fast_3op_lea());
10223   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10224 
10225   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10226   ins_encode %{
10227     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10228     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10229   %}
10230   ins_pipe(ialu_reg_reg);
10231 %}
10232 
10233 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10234 %{
10235   predicate(!UseAPX);
10236   match(Set dst (AddL dst src));
10237   effect(KILL cr);
10238   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10239 
10240   format %{ "addq    $dst, $src\t# long" %}
10241   ins_encode %{
10242     __ addq($dst$$Register, $src$$Register);
10243   %}
10244   ins_pipe(ialu_reg_reg);
10245 %}
10246 
10247 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10248 %{
10249   predicate(UseAPX);
10250   match(Set dst (AddL src1 src2));
10251   effect(KILL cr);
10252   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10253 
10254   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10255   ins_encode %{
10256     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10257   %}
10258   ins_pipe(ialu_reg_reg);
10259 %}
10260 
10261 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10262 %{
10263   predicate(!UseAPX);
10264   match(Set dst (AddL dst src));
10265   effect(KILL cr);
10266   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10267 
10268   format %{ "addq    $dst, $src\t# long" %}
10269   ins_encode %{
10270     __ addq($dst$$Register, $src$$constant);
10271   %}
10272   ins_pipe( ialu_reg );
10273 %}
10274 
10275 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10276 %{
10277   predicate(UseAPX);
10278   match(Set dst (AddL src1 src2));
10279   effect(KILL cr);
10280   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10281 
10282   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10283   ins_encode %{
10284     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10285   %}
10286   ins_pipe( ialu_reg );
10287 %}
10288 
10289 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10290 %{
10291   predicate(UseAPX);
10292   match(Set dst (AddL (LoadL src1) src2));
10293   effect(KILL cr);
10294   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10295 
10296   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10297   ins_encode %{
10298     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10299   %}
10300   ins_pipe( ialu_reg );
10301 %}
10302 
10303 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10304 %{
10305   predicate(!UseAPX);
10306   match(Set dst (AddL dst (LoadL src)));
10307   effect(KILL cr);
10308   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10309 
10310   ins_cost(150); // XXX
10311   format %{ "addq    $dst, $src\t# long" %}
10312   ins_encode %{
10313     __ addq($dst$$Register, $src$$Address);
10314   %}
10315   ins_pipe(ialu_reg_mem);
10316 %}
10317 
10318 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10319 %{
10320   predicate(UseAPX);
10321   match(Set dst (AddL src1 (LoadL src2)));
10322   effect(KILL cr);
10323   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10324 
10325   ins_cost(150);
10326   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10327   ins_encode %{
10328     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10329   %}
10330   ins_pipe(ialu_reg_mem);
10331 %}
10332 
10333 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10334 %{
10335   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10336   effect(KILL cr);
10337   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10338 
10339   ins_cost(150); // XXX
10340   format %{ "addq    $dst, $src\t# long" %}
10341   ins_encode %{
10342     __ addq($dst$$Address, $src$$Register);
10343   %}
10344   ins_pipe(ialu_mem_reg);
10345 %}
10346 
10347 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10348 %{
10349   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10350   effect(KILL cr);
10351   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10352 
10353   ins_cost(125); // XXX
10354   format %{ "addq    $dst, $src\t# long" %}
10355   ins_encode %{
10356     __ addq($dst$$Address, $src$$constant);
10357   %}
10358   ins_pipe(ialu_mem_imm);
10359 %}
10360 
10361 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10362 %{
10363   predicate(!UseAPX && UseIncDec);
10364   match(Set dst (AddL dst src));
10365   effect(KILL cr);
10366 
10367   format %{ "incq    $dst\t# long" %}
10368   ins_encode %{
10369     __ incrementq($dst$$Register);
10370   %}
10371   ins_pipe(ialu_reg);
10372 %}
10373 
10374 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10375 %{
10376   predicate(UseAPX && UseIncDec);
10377   match(Set dst (AddL src val));
10378   effect(KILL cr);
10379   flag(PD::Flag_ndd_demotable_opr1);
10380 
10381   format %{ "eincq    $dst, $src\t# long ndd" %}
10382   ins_encode %{
10383     __ eincq($dst$$Register, $src$$Register, false);
10384   %}
10385   ins_pipe(ialu_reg);
10386 %}
10387 
10388 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10389 %{
10390   predicate(UseAPX && UseIncDec);
10391   match(Set dst (AddL (LoadL src) val));
10392   effect(KILL cr);
10393 
10394   format %{ "eincq    $dst, $src\t# long ndd" %}
10395   ins_encode %{
10396     __ eincq($dst$$Register, $src$$Address, false);
10397   %}
10398   ins_pipe(ialu_reg);
10399 %}
10400 
10401 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10402 %{
10403   predicate(UseIncDec);
10404   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10405   effect(KILL cr);
10406 
10407   ins_cost(125); // XXX
10408   format %{ "incq    $dst\t# long" %}
10409   ins_encode %{
10410     __ incrementq($dst$$Address);
10411   %}
10412   ins_pipe(ialu_mem_imm);
10413 %}
10414 
10415 // XXX why does that use AddL
10416 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10417 %{
10418   predicate(!UseAPX && UseIncDec);
10419   match(Set dst (AddL dst src));
10420   effect(KILL cr);
10421 
10422   format %{ "decq    $dst\t# long" %}
10423   ins_encode %{
10424     __ decrementq($dst$$Register);
10425   %}
10426   ins_pipe(ialu_reg);
10427 %}
10428 
10429 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10430 %{
10431   predicate(UseAPX && UseIncDec);
10432   match(Set dst (AddL src val));
10433   effect(KILL cr);
10434   flag(PD::Flag_ndd_demotable_opr1);
10435 
10436   format %{ "edecq    $dst, $src\t# long ndd" %}
10437   ins_encode %{
10438     __ edecq($dst$$Register, $src$$Register, false);
10439   %}
10440   ins_pipe(ialu_reg);
10441 %}
10442 
10443 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10444 %{
10445   predicate(UseAPX && UseIncDec);
10446   match(Set dst (AddL (LoadL src) val));
10447   effect(KILL cr);
10448 
10449   format %{ "edecq    $dst, $src\t# long ndd" %}
10450   ins_encode %{
10451     __ edecq($dst$$Register, $src$$Address, false);
10452   %}
10453   ins_pipe(ialu_reg);
10454 %}
10455 
10456 // XXX why does that use AddL
10457 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10458 %{
10459   predicate(UseIncDec);
10460   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10461   effect(KILL cr);
10462 
10463   ins_cost(125); // XXX
10464   format %{ "decq    $dst\t# long" %}
10465   ins_encode %{
10466     __ decrementq($dst$$Address);
10467   %}
10468   ins_pipe(ialu_mem_imm);
10469 %}
10470 
10471 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10472 %{
10473   predicate(VM_Version::supports_fast_2op_lea());
10474   match(Set dst (AddL (LShiftL index scale) disp));
10475 
10476   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10477   ins_encode %{
10478     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10479     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10480   %}
10481   ins_pipe(ialu_reg_reg);
10482 %}
10483 
10484 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10485 %{
10486   predicate(VM_Version::supports_fast_3op_lea());
10487   match(Set dst (AddL (AddL base index) disp));
10488 
10489   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10490   ins_encode %{
10491     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10492   %}
10493   ins_pipe(ialu_reg_reg);
10494 %}
10495 
10496 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10497 %{
10498   predicate(VM_Version::supports_fast_2op_lea());
10499   match(Set dst (AddL base (LShiftL index scale)));
10500 
10501   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10502   ins_encode %{
10503     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10504     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10505   %}
10506   ins_pipe(ialu_reg_reg);
10507 %}
10508 
10509 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10510 %{
10511   predicate(VM_Version::supports_fast_3op_lea());
10512   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10513 
10514   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10515   ins_encode %{
10516     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10517     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10518   %}
10519   ins_pipe(ialu_reg_reg);
10520 %}
10521 
10522 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10523 %{
10524   match(Set dst (AddP dst src));
10525   effect(KILL cr);
10526   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10527 
10528   format %{ "addq    $dst, $src\t# ptr" %}
10529   ins_encode %{
10530     __ addq($dst$$Register, $src$$Register);
10531   %}
10532   ins_pipe(ialu_reg_reg);
10533 %}
10534 
10535 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10536 %{
10537   match(Set dst (AddP dst src));
10538   effect(KILL cr);
10539   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10540 
10541   format %{ "addq    $dst, $src\t# ptr" %}
10542   ins_encode %{
10543     __ addq($dst$$Register, $src$$constant);
10544   %}
10545   ins_pipe( ialu_reg );
10546 %}
10547 
10548 // XXX addP mem ops ????
10549 
10550 instruct checkCastPP(rRegP dst)
10551 %{
10552   match(Set dst (CheckCastPP dst));
10553 
10554   size(0);
10555   format %{ "# checkcastPP of $dst" %}
10556   ins_encode(/* empty encoding */);
10557   ins_pipe(empty);
10558 %}
10559 
10560 instruct castPP(rRegP dst)
10561 %{
10562   match(Set dst (CastPP dst));
10563 
10564   size(0);
10565   format %{ "# castPP of $dst" %}
10566   ins_encode(/* empty encoding */);
10567   ins_pipe(empty);
10568 %}
10569 
10570 instruct castII(rRegI dst)
10571 %{
10572   predicate(VerifyConstraintCasts == 0);
10573   match(Set dst (CastII dst));
10574 
10575   size(0);
10576   format %{ "# castII of $dst" %}
10577   ins_encode(/* empty encoding */);
10578   ins_cost(0);
10579   ins_pipe(empty);
10580 %}
10581 
10582 instruct castII_checked(rRegI dst, rFlagsReg cr)
10583 %{
10584   predicate(VerifyConstraintCasts > 0);
10585   match(Set dst (CastII dst));
10586 
10587   effect(KILL cr);
10588   format %{ "# cast_checked_II $dst" %}
10589   ins_encode %{
10590     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10591   %}
10592   ins_pipe(pipe_slow);
10593 %}
10594 
10595 instruct castLL(rRegL dst)
10596 %{
10597   predicate(VerifyConstraintCasts == 0);
10598   match(Set dst (CastLL dst));
10599 
10600   size(0);
10601   format %{ "# castLL of $dst" %}
10602   ins_encode(/* empty encoding */);
10603   ins_cost(0);
10604   ins_pipe(empty);
10605 %}
10606 
10607 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10608 %{
10609   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10610   match(Set dst (CastLL dst));
10611 
10612   effect(KILL cr);
10613   format %{ "# cast_checked_LL $dst" %}
10614   ins_encode %{
10615     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10616   %}
10617   ins_pipe(pipe_slow);
10618 %}
10619 
10620 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10621 %{
10622   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10623   match(Set dst (CastLL dst));
10624 
10625   effect(KILL cr, TEMP tmp);
10626   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10627   ins_encode %{
10628     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10629   %}
10630   ins_pipe(pipe_slow);
10631 %}
10632 
10633 instruct castFF(regF dst)
10634 %{
10635   match(Set dst (CastFF dst));
10636 
10637   size(0);
10638   format %{ "# castFF of $dst" %}
10639   ins_encode(/* empty encoding */);
10640   ins_cost(0);
10641   ins_pipe(empty);
10642 %}
10643 
10644 instruct castHH(regF dst)
10645 %{
10646   match(Set dst (CastHH dst));
10647 
10648   size(0);
10649   format %{ "# castHH of $dst" %}
10650   ins_encode(/* empty encoding */);
10651   ins_cost(0);
10652   ins_pipe(empty);
10653 %}
10654 
10655 instruct castDD(regD dst)
10656 %{
10657   match(Set dst (CastDD dst));
10658 
10659   size(0);
10660   format %{ "# castDD of $dst" %}
10661   ins_encode(/* empty encoding */);
10662   ins_cost(0);
10663   ins_pipe(empty);
10664 %}
10665 
10666 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10667 instruct compareAndSwapP(rRegI res,
10668                          memory mem_ptr,
10669                          rax_RegP oldval, rRegP newval,
10670                          rFlagsReg cr)
10671 %{
10672   predicate(n->as_LoadStore()->barrier_data() == 0);
10673   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10674   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10675   effect(KILL cr, KILL oldval);
10676 
10677   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10678             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10679             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10680   ins_encode %{
10681     __ lock();
10682     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10683     __ setcc(Assembler::equal, $res$$Register);
10684   %}
10685   ins_pipe( pipe_cmpxchg );
10686 %}
10687 
10688 instruct compareAndSwapL(rRegI res,
10689                          memory mem_ptr,
10690                          rax_RegL oldval, rRegL newval,
10691                          rFlagsReg cr)
10692 %{
10693   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10694   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10695   effect(KILL cr, KILL oldval);
10696 
10697   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10698             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10699             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10700   ins_encode %{
10701     __ lock();
10702     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10703     __ setcc(Assembler::equal, $res$$Register);
10704   %}
10705   ins_pipe( pipe_cmpxchg );
10706 %}
10707 
10708 instruct compareAndSwapI(rRegI res,
10709                          memory mem_ptr,
10710                          rax_RegI oldval, rRegI newval,
10711                          rFlagsReg cr)
10712 %{
10713   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10714   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10715   effect(KILL cr, KILL oldval);
10716 
10717   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10718             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10719             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10720   ins_encode %{
10721     __ lock();
10722     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10723     __ setcc(Assembler::equal, $res$$Register);
10724   %}
10725   ins_pipe( pipe_cmpxchg );
10726 %}
10727 
10728 instruct compareAndSwapB(rRegI res,
10729                          memory mem_ptr,
10730                          rax_RegI oldval, rRegI newval,
10731                          rFlagsReg cr)
10732 %{
10733   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10734   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10735   effect(KILL cr, KILL oldval);
10736 
10737   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10738             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10739             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10740   ins_encode %{
10741     __ lock();
10742     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10743     __ setcc(Assembler::equal, $res$$Register);
10744   %}
10745   ins_pipe( pipe_cmpxchg );
10746 %}
10747 
10748 instruct compareAndSwapS(rRegI res,
10749                          memory mem_ptr,
10750                          rax_RegI oldval, rRegI newval,
10751                          rFlagsReg cr)
10752 %{
10753   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10754   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10755   effect(KILL cr, KILL oldval);
10756 
10757   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10758             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10759             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10760   ins_encode %{
10761     __ lock();
10762     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10763     __ setcc(Assembler::equal, $res$$Register);
10764   %}
10765   ins_pipe( pipe_cmpxchg );
10766 %}
10767 
10768 instruct compareAndSwapN(rRegI res,
10769                           memory mem_ptr,
10770                           rax_RegN oldval, rRegN newval,
10771                           rFlagsReg cr) %{
10772   predicate(n->as_LoadStore()->barrier_data() == 0);
10773   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10774   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10775   effect(KILL cr, KILL oldval);
10776 
10777   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10778             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10779             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10780   ins_encode %{
10781     __ lock();
10782     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10783     __ setcc(Assembler::equal, $res$$Register);
10784   %}
10785   ins_pipe( pipe_cmpxchg );
10786 %}
10787 
10788 instruct compareAndExchangeB(
10789                          memory mem_ptr,
10790                          rax_RegI oldval, rRegI newval,
10791                          rFlagsReg cr)
10792 %{
10793   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10794   effect(KILL cr);
10795 
10796   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10797             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10798   ins_encode %{
10799     __ lock();
10800     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10801   %}
10802   ins_pipe( pipe_cmpxchg );
10803 %}
10804 
10805 instruct compareAndExchangeS(
10806                          memory mem_ptr,
10807                          rax_RegI oldval, rRegI newval,
10808                          rFlagsReg cr)
10809 %{
10810   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10811   effect(KILL cr);
10812 
10813   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10814             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10815   ins_encode %{
10816     __ lock();
10817     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10818   %}
10819   ins_pipe( pipe_cmpxchg );
10820 %}
10821 
10822 instruct compareAndExchangeI(
10823                          memory mem_ptr,
10824                          rax_RegI oldval, rRegI newval,
10825                          rFlagsReg cr)
10826 %{
10827   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10828   effect(KILL cr);
10829 
10830   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10831             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10832   ins_encode %{
10833     __ lock();
10834     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10835   %}
10836   ins_pipe( pipe_cmpxchg );
10837 %}
10838 
10839 instruct compareAndExchangeL(
10840                          memory mem_ptr,
10841                          rax_RegL oldval, rRegL newval,
10842                          rFlagsReg cr)
10843 %{
10844   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10845   effect(KILL cr);
10846 
10847   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10848             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10849   ins_encode %{
10850     __ lock();
10851     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10852   %}
10853   ins_pipe( pipe_cmpxchg );
10854 %}
10855 
10856 instruct compareAndExchangeN(
10857                           memory mem_ptr,
10858                           rax_RegN oldval, rRegN newval,
10859                           rFlagsReg cr) %{
10860   predicate(n->as_LoadStore()->barrier_data() == 0);
10861   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10862   effect(KILL cr);
10863 
10864   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10865             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10866   ins_encode %{
10867     __ lock();
10868     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10869   %}
10870   ins_pipe( pipe_cmpxchg );
10871 %}
10872 
10873 instruct compareAndExchangeP(
10874                          memory mem_ptr,
10875                          rax_RegP oldval, rRegP newval,
10876                          rFlagsReg cr)
10877 %{
10878   predicate(n->as_LoadStore()->barrier_data() == 0);
10879   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10880   effect(KILL cr);
10881 
10882   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10883             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10884   ins_encode %{
10885     __ lock();
10886     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10887   %}
10888   ins_pipe( pipe_cmpxchg );
10889 %}
10890 
10891 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10892   predicate(n->as_LoadStore()->result_not_used());
10893   match(Set dummy (GetAndAddB mem add));
10894   effect(KILL cr);
10895   format %{ "addb_lock   $mem, $add" %}
10896   ins_encode %{
10897     __ lock();
10898     __ addb($mem$$Address, $add$$Register);
10899   %}
10900   ins_pipe(pipe_cmpxchg);
10901 %}
10902 
10903 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10904   predicate(n->as_LoadStore()->result_not_used());
10905   match(Set dummy (GetAndAddB mem add));
10906   effect(KILL cr);
10907   format %{ "addb_lock   $mem, $add" %}
10908   ins_encode %{
10909     __ lock();
10910     __ addb($mem$$Address, $add$$constant);
10911   %}
10912   ins_pipe(pipe_cmpxchg);
10913 %}
10914 
10915 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10916   predicate(!n->as_LoadStore()->result_not_used());
10917   match(Set newval (GetAndAddB mem newval));
10918   effect(KILL cr);
10919   format %{ "xaddb_lock  $mem, $newval" %}
10920   ins_encode %{
10921     __ lock();
10922     __ xaddb($mem$$Address, $newval$$Register);
10923   %}
10924   ins_pipe(pipe_cmpxchg);
10925 %}
10926 
10927 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10928   predicate(n->as_LoadStore()->result_not_used());
10929   match(Set dummy (GetAndAddS mem add));
10930   effect(KILL cr);
10931   format %{ "addw_lock   $mem, $add" %}
10932   ins_encode %{
10933     __ lock();
10934     __ addw($mem$$Address, $add$$Register);
10935   %}
10936   ins_pipe(pipe_cmpxchg);
10937 %}
10938 
10939 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10940   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10941   match(Set dummy (GetAndAddS mem add));
10942   effect(KILL cr);
10943   format %{ "addw_lock   $mem, $add" %}
10944   ins_encode %{
10945     __ lock();
10946     __ addw($mem$$Address, $add$$constant);
10947   %}
10948   ins_pipe(pipe_cmpxchg);
10949 %}
10950 
10951 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10952   predicate(!n->as_LoadStore()->result_not_used());
10953   match(Set newval (GetAndAddS mem newval));
10954   effect(KILL cr);
10955   format %{ "xaddw_lock  $mem, $newval" %}
10956   ins_encode %{
10957     __ lock();
10958     __ xaddw($mem$$Address, $newval$$Register);
10959   %}
10960   ins_pipe(pipe_cmpxchg);
10961 %}
10962 
10963 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10964   predicate(n->as_LoadStore()->result_not_used());
10965   match(Set dummy (GetAndAddI mem add));
10966   effect(KILL cr);
10967   format %{ "addl_lock   $mem, $add" %}
10968   ins_encode %{
10969     __ lock();
10970     __ addl($mem$$Address, $add$$Register);
10971   %}
10972   ins_pipe(pipe_cmpxchg);
10973 %}
10974 
10975 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10976   predicate(n->as_LoadStore()->result_not_used());
10977   match(Set dummy (GetAndAddI mem add));
10978   effect(KILL cr);
10979   format %{ "addl_lock   $mem, $add" %}
10980   ins_encode %{
10981     __ lock();
10982     __ addl($mem$$Address, $add$$constant);
10983   %}
10984   ins_pipe(pipe_cmpxchg);
10985 %}
10986 
10987 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10988   predicate(!n->as_LoadStore()->result_not_used());
10989   match(Set newval (GetAndAddI mem newval));
10990   effect(KILL cr);
10991   format %{ "xaddl_lock  $mem, $newval" %}
10992   ins_encode %{
10993     __ lock();
10994     __ xaddl($mem$$Address, $newval$$Register);
10995   %}
10996   ins_pipe(pipe_cmpxchg);
10997 %}
10998 
10999 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
11000   predicate(n->as_LoadStore()->result_not_used());
11001   match(Set dummy (GetAndAddL mem add));
11002   effect(KILL cr);
11003   format %{ "addq_lock   $mem, $add" %}
11004   ins_encode %{
11005     __ lock();
11006     __ addq($mem$$Address, $add$$Register);
11007   %}
11008   ins_pipe(pipe_cmpxchg);
11009 %}
11010 
11011 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
11012   predicate(n->as_LoadStore()->result_not_used());
11013   match(Set dummy (GetAndAddL mem add));
11014   effect(KILL cr);
11015   format %{ "addq_lock   $mem, $add" %}
11016   ins_encode %{
11017     __ lock();
11018     __ addq($mem$$Address, $add$$constant);
11019   %}
11020   ins_pipe(pipe_cmpxchg);
11021 %}
11022 
11023 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11024   predicate(!n->as_LoadStore()->result_not_used());
11025   match(Set newval (GetAndAddL mem newval));
11026   effect(KILL cr);
11027   format %{ "xaddq_lock  $mem, $newval" %}
11028   ins_encode %{
11029     __ lock();
11030     __ xaddq($mem$$Address, $newval$$Register);
11031   %}
11032   ins_pipe(pipe_cmpxchg);
11033 %}
11034 
11035 instruct xchgB( memory mem, rRegI newval) %{
11036   match(Set newval (GetAndSetB mem newval));
11037   format %{ "XCHGB  $newval,[$mem]" %}
11038   ins_encode %{
11039     __ xchgb($newval$$Register, $mem$$Address);
11040   %}
11041   ins_pipe( pipe_cmpxchg );
11042 %}
11043 
11044 instruct xchgS( memory mem, rRegI newval) %{
11045   match(Set newval (GetAndSetS mem newval));
11046   format %{ "XCHGW  $newval,[$mem]" %}
11047   ins_encode %{
11048     __ xchgw($newval$$Register, $mem$$Address);
11049   %}
11050   ins_pipe( pipe_cmpxchg );
11051 %}
11052 
11053 instruct xchgI( memory mem, rRegI newval) %{
11054   match(Set newval (GetAndSetI mem newval));
11055   format %{ "XCHGL  $newval,[$mem]" %}
11056   ins_encode %{
11057     __ xchgl($newval$$Register, $mem$$Address);
11058   %}
11059   ins_pipe( pipe_cmpxchg );
11060 %}
11061 
11062 instruct xchgL( memory mem, rRegL newval) %{
11063   match(Set newval (GetAndSetL mem newval));
11064   format %{ "XCHGL  $newval,[$mem]" %}
11065   ins_encode %{
11066     __ xchgq($newval$$Register, $mem$$Address);
11067   %}
11068   ins_pipe( pipe_cmpxchg );
11069 %}
11070 
11071 instruct xchgP( memory mem, rRegP newval) %{
11072   match(Set newval (GetAndSetP mem newval));
11073   predicate(n->as_LoadStore()->barrier_data() == 0);
11074   format %{ "XCHGQ  $newval,[$mem]" %}
11075   ins_encode %{
11076     __ xchgq($newval$$Register, $mem$$Address);
11077   %}
11078   ins_pipe( pipe_cmpxchg );
11079 %}
11080 
11081 instruct xchgN( memory mem, rRegN newval) %{
11082   predicate(n->as_LoadStore()->barrier_data() == 0);
11083   match(Set newval (GetAndSetN mem newval));
11084   format %{ "XCHGL  $newval,$mem]" %}
11085   ins_encode %{
11086     __ xchgl($newval$$Register, $mem$$Address);
11087   %}
11088   ins_pipe( pipe_cmpxchg );
11089 %}
11090 
11091 //----------Abs Instructions-------------------------------------------
11092 
11093 // Integer Absolute Instructions
11094 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11095 %{
11096   match(Set dst (AbsI src));
11097   effect(TEMP dst, KILL cr);
11098   format %{ "xorl    $dst, $dst\t# abs int\n\t"
11099             "subl    $dst, $src\n\t"
11100             "cmovll  $dst, $src" %}
11101   ins_encode %{
11102     __ xorl($dst$$Register, $dst$$Register);
11103     __ subl($dst$$Register, $src$$Register);
11104     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11105   %}
11106 
11107   ins_pipe(ialu_reg_reg);
11108 %}
11109 
11110 // Long Absolute Instructions
11111 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11112 %{
11113   match(Set dst (AbsL src));
11114   effect(TEMP dst, KILL cr);
11115   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11116             "subq    $dst, $src\n\t"
11117             "cmovlq  $dst, $src" %}
11118   ins_encode %{
11119     __ xorl($dst$$Register, $dst$$Register);
11120     __ subq($dst$$Register, $src$$Register);
11121     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11122   %}
11123 
11124   ins_pipe(ialu_reg_reg);
11125 %}
11126 
11127 //----------Subtraction Instructions-------------------------------------------
11128 
11129 // Integer Subtraction Instructions
11130 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11131 %{
11132   predicate(!UseAPX);
11133   match(Set dst (SubI dst src));
11134   effect(KILL cr);
11135   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11136 
11137   format %{ "subl    $dst, $src\t# int" %}
11138   ins_encode %{
11139     __ subl($dst$$Register, $src$$Register);
11140   %}
11141   ins_pipe(ialu_reg_reg);
11142 %}
11143 
11144 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11145 %{
11146   predicate(UseAPX);
11147   match(Set dst (SubI src1 src2));
11148   effect(KILL cr);
11149   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11150 
11151   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11152   ins_encode %{
11153     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11154   %}
11155   ins_pipe(ialu_reg_reg);
11156 %}
11157 
11158 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11159 %{
11160   predicate(UseAPX);
11161   match(Set dst (SubI src1 src2));
11162   effect(KILL cr);
11163   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11164 
11165   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11166   ins_encode %{
11167     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11168   %}
11169   ins_pipe(ialu_reg_reg);
11170 %}
11171 
11172 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11173 %{
11174   predicate(UseAPX);
11175   match(Set dst (SubI (LoadI src1) src2));
11176   effect(KILL cr);
11177   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11178 
11179   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11180   ins_encode %{
11181     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11182   %}
11183   ins_pipe(ialu_reg_reg);
11184 %}
11185 
11186 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11187 %{
11188   predicate(!UseAPX);
11189   match(Set dst (SubI dst (LoadI src)));
11190   effect(KILL cr);
11191   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11192 
11193   ins_cost(150);
11194   format %{ "subl    $dst, $src\t# int" %}
11195   ins_encode %{
11196     __ subl($dst$$Register, $src$$Address);
11197   %}
11198   ins_pipe(ialu_reg_mem);
11199 %}
11200 
11201 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11202 %{
11203   predicate(UseAPX);
11204   match(Set dst (SubI src1 (LoadI src2)));
11205   effect(KILL cr);
11206   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11207 
11208   ins_cost(150);
11209   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11210   ins_encode %{
11211     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11212   %}
11213   ins_pipe(ialu_reg_mem);
11214 %}
11215 
11216 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11217 %{
11218   predicate(UseAPX);
11219   match(Set dst (SubI (LoadI src1) src2));
11220   effect(KILL cr);
11221   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11222 
11223   ins_cost(150);
11224   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11225   ins_encode %{
11226     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11227   %}
11228   ins_pipe(ialu_reg_mem);
11229 %}
11230 
11231 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11232 %{
11233   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11234   effect(KILL cr);
11235   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11236 
11237   ins_cost(150);
11238   format %{ "subl    $dst, $src\t# int" %}
11239   ins_encode %{
11240     __ subl($dst$$Address, $src$$Register);
11241   %}
11242   ins_pipe(ialu_mem_reg);
11243 %}
11244 
11245 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11246 %{
11247   predicate(!UseAPX);
11248   match(Set dst (SubL dst src));
11249   effect(KILL cr);
11250   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11251 
11252   format %{ "subq    $dst, $src\t# long" %}
11253   ins_encode %{
11254     __ subq($dst$$Register, $src$$Register);
11255   %}
11256   ins_pipe(ialu_reg_reg);
11257 %}
11258 
11259 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11260 %{
11261   predicate(UseAPX);
11262   match(Set dst (SubL src1 src2));
11263   effect(KILL cr);
11264   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11265 
11266   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11267   ins_encode %{
11268     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11269   %}
11270   ins_pipe(ialu_reg_reg);
11271 %}
11272 
11273 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11274 %{
11275   predicate(UseAPX);
11276   match(Set dst (SubL src1 src2));
11277   effect(KILL cr);
11278   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11279 
11280   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11281   ins_encode %{
11282     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11283   %}
11284   ins_pipe(ialu_reg_reg);
11285 %}
11286 
11287 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11288 %{
11289   predicate(UseAPX);
11290   match(Set dst (SubL (LoadL src1) src2));
11291   effect(KILL cr);
11292   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11293 
11294   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11295   ins_encode %{
11296     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11297   %}
11298   ins_pipe(ialu_reg_reg);
11299 %}
11300 
11301 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11302 %{
11303   predicate(!UseAPX);
11304   match(Set dst (SubL dst (LoadL src)));
11305   effect(KILL cr);
11306   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11307 
11308   ins_cost(150);
11309   format %{ "subq    $dst, $src\t# long" %}
11310   ins_encode %{
11311     __ subq($dst$$Register, $src$$Address);
11312   %}
11313   ins_pipe(ialu_reg_mem);
11314 %}
11315 
11316 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11317 %{
11318   predicate(UseAPX);
11319   match(Set dst (SubL src1 (LoadL src2)));
11320   effect(KILL cr);
11321   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11322 
11323   ins_cost(150);
11324   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11325   ins_encode %{
11326     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11327   %}
11328   ins_pipe(ialu_reg_mem);
11329 %}
11330 
11331 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11332 %{
11333   predicate(UseAPX);
11334   match(Set dst (SubL (LoadL src1) src2));
11335   effect(KILL cr);
11336   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11337 
11338   ins_cost(150);
11339   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11340   ins_encode %{
11341     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11342   %}
11343   ins_pipe(ialu_reg_mem);
11344 %}
11345 
11346 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11347 %{
11348   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11349   effect(KILL cr);
11350   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11351 
11352   ins_cost(150);
11353   format %{ "subq    $dst, $src\t# long" %}
11354   ins_encode %{
11355     __ subq($dst$$Address, $src$$Register);
11356   %}
11357   ins_pipe(ialu_mem_reg);
11358 %}
11359 
11360 // Subtract from a pointer
11361 // XXX hmpf???
11362 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11363 %{
11364   match(Set dst (AddP dst (SubI zero src)));
11365   effect(KILL cr);
11366 
11367   format %{ "subq    $dst, $src\t# ptr - int" %}
11368   ins_encode %{
11369     __ subq($dst$$Register, $src$$Register);
11370   %}
11371   ins_pipe(ialu_reg_reg);
11372 %}
11373 
11374 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11375 %{
11376   predicate(!UseAPX);
11377   match(Set dst (SubI zero dst));
11378   effect(KILL cr);
11379   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11380 
11381   format %{ "negl    $dst\t# int" %}
11382   ins_encode %{
11383     __ negl($dst$$Register);
11384   %}
11385   ins_pipe(ialu_reg);
11386 %}
11387 
11388 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11389 %{
11390   predicate(UseAPX);
11391   match(Set dst (SubI zero src));
11392   effect(KILL cr);
11393   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11394 
11395   format %{ "enegl    $dst, $src\t# int ndd" %}
11396   ins_encode %{
11397     __ enegl($dst$$Register, $src$$Register, false);
11398   %}
11399   ins_pipe(ialu_reg);
11400 %}
11401 
11402 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11403 %{
11404   predicate(!UseAPX);
11405   match(Set dst (NegI dst));
11406   effect(KILL cr);
11407   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11408 
11409   format %{ "negl    $dst\t# int" %}
11410   ins_encode %{
11411     __ negl($dst$$Register);
11412   %}
11413   ins_pipe(ialu_reg);
11414 %}
11415 
11416 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11417 %{
11418   predicate(UseAPX);
11419   match(Set dst (NegI src));
11420   effect(KILL cr);
11421   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11422 
11423   format %{ "enegl    $dst, $src\t# int ndd" %}
11424   ins_encode %{
11425     __ enegl($dst$$Register, $src$$Register, false);
11426   %}
11427   ins_pipe(ialu_reg);
11428 %}
11429 
11430 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11431 %{
11432   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11433   effect(KILL cr);
11434   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11435 
11436   format %{ "negl    $dst\t# int" %}
11437   ins_encode %{
11438     __ negl($dst$$Address);
11439   %}
11440   ins_pipe(ialu_reg);
11441 %}
11442 
11443 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11444 %{
11445   predicate(!UseAPX);
11446   match(Set dst (SubL zero dst));
11447   effect(KILL cr);
11448   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11449 
11450   format %{ "negq    $dst\t# long" %}
11451   ins_encode %{
11452     __ negq($dst$$Register);
11453   %}
11454   ins_pipe(ialu_reg);
11455 %}
11456 
11457 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11458 %{
11459   predicate(UseAPX);
11460   match(Set dst (SubL zero src));
11461   effect(KILL cr);
11462   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11463 
11464   format %{ "enegq    $dst, $src\t# long ndd" %}
11465   ins_encode %{
11466     __ enegq($dst$$Register, $src$$Register, false);
11467   %}
11468   ins_pipe(ialu_reg);
11469 %}
11470 
11471 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11472 %{
11473   predicate(!UseAPX);
11474   match(Set dst (NegL dst));
11475   effect(KILL cr);
11476   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11477 
11478   format %{ "negq    $dst\t# int" %}
11479   ins_encode %{
11480     __ negq($dst$$Register);
11481   %}
11482   ins_pipe(ialu_reg);
11483 %}
11484 
11485 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11486 %{
11487   predicate(UseAPX);
11488   match(Set dst (NegL src));
11489   effect(KILL cr);
11490   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11491 
11492   format %{ "enegq    $dst, $src\t# long ndd" %}
11493   ins_encode %{
11494     __ enegq($dst$$Register, $src$$Register, false);
11495   %}
11496   ins_pipe(ialu_reg);
11497 %}
11498 
11499 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11500 %{
11501   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11502   effect(KILL cr);
11503   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11504 
11505   format %{ "negq    $dst\t# long" %}
11506   ins_encode %{
11507     __ negq($dst$$Address);
11508   %}
11509   ins_pipe(ialu_reg);
11510 %}
11511 
11512 //----------Multiplication/Division Instructions-------------------------------
11513 // Integer Multiplication Instructions
11514 // Multiply Register
11515 
11516 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11517 %{
11518   predicate(!UseAPX);
11519   match(Set dst (MulI dst src));
11520   effect(KILL cr);
11521 
11522   ins_cost(300);
11523   format %{ "imull   $dst, $src\t# int" %}
11524   ins_encode %{
11525     __ imull($dst$$Register, $src$$Register);
11526   %}
11527   ins_pipe(ialu_reg_reg_alu0);
11528 %}
11529 
11530 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11531 %{
11532   predicate(UseAPX);
11533   match(Set dst (MulI src1 src2));
11534   effect(KILL cr);
11535   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11536 
11537   ins_cost(300);
11538   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11539   ins_encode %{
11540     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11541   %}
11542   ins_pipe(ialu_reg_reg_alu0);
11543 %}
11544 
11545 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11546 %{
11547   match(Set dst (MulI src imm));
11548   effect(KILL cr);
11549 
11550   ins_cost(300);
11551   format %{ "imull   $dst, $src, $imm\t# int" %}
11552   ins_encode %{
11553     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11554   %}
11555   ins_pipe(ialu_reg_reg_alu0);
11556 %}
11557 
11558 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11559 %{
11560   predicate(!UseAPX);
11561   match(Set dst (MulI dst (LoadI src)));
11562   effect(KILL cr);
11563 
11564   ins_cost(350);
11565   format %{ "imull   $dst, $src\t# int" %}
11566   ins_encode %{
11567     __ imull($dst$$Register, $src$$Address);
11568   %}
11569   ins_pipe(ialu_reg_mem_alu0);
11570 %}
11571 
11572 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11573 %{
11574   predicate(UseAPX);
11575   match(Set dst (MulI src1 (LoadI src2)));
11576   effect(KILL cr);
11577   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11578 
11579   ins_cost(350);
11580   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11581   ins_encode %{
11582     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11583   %}
11584   ins_pipe(ialu_reg_mem_alu0);
11585 %}
11586 
11587 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11588 %{
11589   match(Set dst (MulI (LoadI src) imm));
11590   effect(KILL cr);
11591 
11592   ins_cost(300);
11593   format %{ "imull   $dst, $src, $imm\t# int" %}
11594   ins_encode %{
11595     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11596   %}
11597   ins_pipe(ialu_reg_mem_alu0);
11598 %}
11599 
11600 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11601 %{
11602   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11603   effect(KILL cr, KILL src2);
11604 
11605   expand %{ mulI_rReg(dst, src1, cr);
11606            mulI_rReg(src2, src3, cr);
11607            addI_rReg(dst, src2, cr); %}
11608 %}
11609 
11610 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11611 %{
11612   predicate(!UseAPX);
11613   match(Set dst (MulL dst src));
11614   effect(KILL cr);
11615 
11616   ins_cost(300);
11617   format %{ "imulq   $dst, $src\t# long" %}
11618   ins_encode %{
11619     __ imulq($dst$$Register, $src$$Register);
11620   %}
11621   ins_pipe(ialu_reg_reg_alu0);
11622 %}
11623 
11624 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11625 %{
11626   predicate(UseAPX);
11627   match(Set dst (MulL src1 src2));
11628   effect(KILL cr);
11629   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11630 
11631   ins_cost(300);
11632   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11633   ins_encode %{
11634     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11635   %}
11636   ins_pipe(ialu_reg_reg_alu0);
11637 %}
11638 
11639 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11640 %{
11641   match(Set dst (MulL src imm));
11642   effect(KILL cr);
11643 
11644   ins_cost(300);
11645   format %{ "imulq   $dst, $src, $imm\t# long" %}
11646   ins_encode %{
11647     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11648   %}
11649   ins_pipe(ialu_reg_reg_alu0);
11650 %}
11651 
11652 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11653 %{
11654   predicate(!UseAPX);
11655   match(Set dst (MulL dst (LoadL src)));
11656   effect(KILL cr);
11657 
11658   ins_cost(350);
11659   format %{ "imulq   $dst, $src\t# long" %}
11660   ins_encode %{
11661     __ imulq($dst$$Register, $src$$Address);
11662   %}
11663   ins_pipe(ialu_reg_mem_alu0);
11664 %}
11665 
11666 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11667 %{
11668   predicate(UseAPX);
11669   match(Set dst (MulL src1 (LoadL src2)));
11670   effect(KILL cr);
11671   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11672 
11673   ins_cost(350);
11674   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11675   ins_encode %{
11676     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11677   %}
11678   ins_pipe(ialu_reg_mem_alu0);
11679 %}
11680 
11681 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11682 %{
11683   match(Set dst (MulL (LoadL src) imm));
11684   effect(KILL cr);
11685 
11686   ins_cost(300);
11687   format %{ "imulq   $dst, $src, $imm\t# long" %}
11688   ins_encode %{
11689     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11690   %}
11691   ins_pipe(ialu_reg_mem_alu0);
11692 %}
11693 
11694 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11695 %{
11696   match(Set dst (MulHiL src rax));
11697   effect(USE_KILL rax, KILL cr);
11698 
11699   ins_cost(300);
11700   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11701   ins_encode %{
11702     __ imulq($src$$Register);
11703   %}
11704   ins_pipe(ialu_reg_reg_alu0);
11705 %}
11706 
11707 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11708 %{
11709   match(Set dst (UMulHiL src rax));
11710   effect(USE_KILL rax, KILL cr);
11711 
11712   ins_cost(300);
11713   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11714   ins_encode %{
11715     __ mulq($src$$Register);
11716   %}
11717   ins_pipe(ialu_reg_reg_alu0);
11718 %}
11719 
11720 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11721                    rFlagsReg cr)
11722 %{
11723   match(Set rax (DivI rax div));
11724   effect(KILL rdx, KILL cr);
11725 
11726   ins_cost(30*100+10*100); // XXX
11727   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11728             "jne,s   normal\n\t"
11729             "xorl    rdx, rdx\n\t"
11730             "cmpl    $div, -1\n\t"
11731             "je,s    done\n"
11732     "normal: cdql\n\t"
11733             "idivl   $div\n"
11734     "done:"        %}
11735   ins_encode(cdql_enc(div));
11736   ins_pipe(ialu_reg_reg_alu0);
11737 %}
11738 
11739 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11740                    rFlagsReg cr)
11741 %{
11742   match(Set rax (DivL rax div));
11743   effect(KILL rdx, KILL cr);
11744 
11745   ins_cost(30*100+10*100); // XXX
11746   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11747             "cmpq    rax, rdx\n\t"
11748             "jne,s   normal\n\t"
11749             "xorl    rdx, rdx\n\t"
11750             "cmpq    $div, -1\n\t"
11751             "je,s    done\n"
11752     "normal: cdqq\n\t"
11753             "idivq   $div\n"
11754     "done:"        %}
11755   ins_encode(cdqq_enc(div));
11756   ins_pipe(ialu_reg_reg_alu0);
11757 %}
11758 
11759 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11760 %{
11761   match(Set rax (UDivI rax div));
11762   effect(KILL rdx, KILL cr);
11763 
11764   ins_cost(300);
11765   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11766   ins_encode %{
11767     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11768   %}
11769   ins_pipe(ialu_reg_reg_alu0);
11770 %}
11771 
11772 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11773 %{
11774   match(Set rax (UDivL rax div));
11775   effect(KILL rdx, KILL cr);
11776 
11777   ins_cost(300);
11778   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11779   ins_encode %{
11780      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11781   %}
11782   ins_pipe(ialu_reg_reg_alu0);
11783 %}
11784 
11785 // Integer DIVMOD with Register, both quotient and mod results
11786 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11787                              rFlagsReg cr)
11788 %{
11789   match(DivModI rax div);
11790   effect(KILL cr);
11791 
11792   ins_cost(30*100+10*100); // XXX
11793   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11794             "jne,s   normal\n\t"
11795             "xorl    rdx, rdx\n\t"
11796             "cmpl    $div, -1\n\t"
11797             "je,s    done\n"
11798     "normal: cdql\n\t"
11799             "idivl   $div\n"
11800     "done:"        %}
11801   ins_encode(cdql_enc(div));
11802   ins_pipe(pipe_slow);
11803 %}
11804 
11805 // Long DIVMOD with Register, both quotient and mod results
11806 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11807                              rFlagsReg cr)
11808 %{
11809   match(DivModL rax div);
11810   effect(KILL cr);
11811 
11812   ins_cost(30*100+10*100); // XXX
11813   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11814             "cmpq    rax, rdx\n\t"
11815             "jne,s   normal\n\t"
11816             "xorl    rdx, rdx\n\t"
11817             "cmpq    $div, -1\n\t"
11818             "je,s    done\n"
11819     "normal: cdqq\n\t"
11820             "idivq   $div\n"
11821     "done:"        %}
11822   ins_encode(cdqq_enc(div));
11823   ins_pipe(pipe_slow);
11824 %}
11825 
11826 // Unsigned integer DIVMOD with Register, both quotient and mod results
11827 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11828                               no_rax_rdx_RegI div, rFlagsReg cr)
11829 %{
11830   match(UDivModI rax div);
11831   effect(TEMP tmp, KILL cr);
11832 
11833   ins_cost(300);
11834   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11835             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11836           %}
11837   ins_encode %{
11838     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11839   %}
11840   ins_pipe(pipe_slow);
11841 %}
11842 
11843 // Unsigned long DIVMOD with Register, both quotient and mod results
11844 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11845                               no_rax_rdx_RegL div, rFlagsReg cr)
11846 %{
11847   match(UDivModL rax div);
11848   effect(TEMP tmp, KILL cr);
11849 
11850   ins_cost(300);
11851   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11852             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11853           %}
11854   ins_encode %{
11855     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11856   %}
11857   ins_pipe(pipe_slow);
11858 %}
11859 
11860 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11861                    rFlagsReg cr)
11862 %{
11863   match(Set rdx (ModI rax div));
11864   effect(KILL rax, KILL cr);
11865 
11866   ins_cost(300); // XXX
11867   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11868             "jne,s   normal\n\t"
11869             "xorl    rdx, rdx\n\t"
11870             "cmpl    $div, -1\n\t"
11871             "je,s    done\n"
11872     "normal: cdql\n\t"
11873             "idivl   $div\n"
11874     "done:"        %}
11875   ins_encode(cdql_enc(div));
11876   ins_pipe(ialu_reg_reg_alu0);
11877 %}
11878 
11879 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11880                    rFlagsReg cr)
11881 %{
11882   match(Set rdx (ModL rax div));
11883   effect(KILL rax, KILL cr);
11884 
11885   ins_cost(300); // XXX
11886   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11887             "cmpq    rax, rdx\n\t"
11888             "jne,s   normal\n\t"
11889             "xorl    rdx, rdx\n\t"
11890             "cmpq    $div, -1\n\t"
11891             "je,s    done\n"
11892     "normal: cdqq\n\t"
11893             "idivq   $div\n"
11894     "done:"        %}
11895   ins_encode(cdqq_enc(div));
11896   ins_pipe(ialu_reg_reg_alu0);
11897 %}
11898 
11899 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11900 %{
11901   match(Set rdx (UModI rax div));
11902   effect(KILL rax, KILL cr);
11903 
11904   ins_cost(300);
11905   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11906   ins_encode %{
11907     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11908   %}
11909   ins_pipe(ialu_reg_reg_alu0);
11910 %}
11911 
11912 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11913 %{
11914   match(Set rdx (UModL rax div));
11915   effect(KILL rax, KILL cr);
11916 
11917   ins_cost(300);
11918   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11919   ins_encode %{
11920     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11921   %}
11922   ins_pipe(ialu_reg_reg_alu0);
11923 %}
11924 
11925 // Integer Shift Instructions
11926 // Shift Left by one, two, three
11927 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11928 %{
11929   predicate(!UseAPX);
11930   match(Set dst (LShiftI dst shift));
11931   effect(KILL cr);
11932 
11933   format %{ "sall    $dst, $shift" %}
11934   ins_encode %{
11935     __ sall($dst$$Register, $shift$$constant);
11936   %}
11937   ins_pipe(ialu_reg);
11938 %}
11939 
11940 // Shift Left by one, two, three
11941 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11942 %{
11943   predicate(UseAPX);
11944   match(Set dst (LShiftI src shift));
11945   effect(KILL cr);
11946   flag(PD::Flag_ndd_demotable_opr1);
11947 
11948   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11949   ins_encode %{
11950     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11951   %}
11952   ins_pipe(ialu_reg);
11953 %}
11954 
11955 // Shift Left by 8-bit immediate
11956 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11957 %{
11958   predicate(!UseAPX);
11959   match(Set dst (LShiftI dst shift));
11960   effect(KILL cr);
11961 
11962   format %{ "sall    $dst, $shift" %}
11963   ins_encode %{
11964     __ sall($dst$$Register, $shift$$constant);
11965   %}
11966   ins_pipe(ialu_reg);
11967 %}
11968 
11969 // Shift Left by 8-bit immediate
11970 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11971 %{
11972   predicate(UseAPX);
11973   match(Set dst (LShiftI src shift));
11974   effect(KILL cr);
11975   flag(PD::Flag_ndd_demotable_opr1);
11976 
11977   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11978   ins_encode %{
11979     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11980   %}
11981   ins_pipe(ialu_reg);
11982 %}
11983 
11984 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11985 %{
11986   predicate(UseAPX);
11987   match(Set dst (LShiftI (LoadI src) shift));
11988   effect(KILL cr);
11989 
11990   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11991   ins_encode %{
11992     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11993   %}
11994   ins_pipe(ialu_reg);
11995 %}
11996 
11997 // Shift Left by 8-bit immediate
11998 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11999 %{
12000   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12001   effect(KILL cr);
12002 
12003   format %{ "sall    $dst, $shift" %}
12004   ins_encode %{
12005     __ sall($dst$$Address, $shift$$constant);
12006   %}
12007   ins_pipe(ialu_mem_imm);
12008 %}
12009 
12010 // Shift Left by variable
12011 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12012 %{
12013   predicate(!VM_Version::supports_bmi2());
12014   match(Set dst (LShiftI dst shift));
12015   effect(KILL cr);
12016 
12017   format %{ "sall    $dst, $shift" %}
12018   ins_encode %{
12019     __ sall($dst$$Register);
12020   %}
12021   ins_pipe(ialu_reg_reg);
12022 %}
12023 
12024 // Shift Left by variable
12025 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12026 %{
12027   predicate(!VM_Version::supports_bmi2());
12028   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12029   effect(KILL cr);
12030 
12031   format %{ "sall    $dst, $shift" %}
12032   ins_encode %{
12033     __ sall($dst$$Address);
12034   %}
12035   ins_pipe(ialu_mem_reg);
12036 %}
12037 
12038 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12039 %{
12040   predicate(VM_Version::supports_bmi2());
12041   match(Set dst (LShiftI src shift));
12042 
12043   format %{ "shlxl   $dst, $src, $shift" %}
12044   ins_encode %{
12045     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12046   %}
12047   ins_pipe(ialu_reg_reg);
12048 %}
12049 
12050 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12051 %{
12052   predicate(VM_Version::supports_bmi2());
12053   match(Set dst (LShiftI (LoadI src) shift));
12054   ins_cost(175);
12055   format %{ "shlxl   $dst, $src, $shift" %}
12056   ins_encode %{
12057     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12058   %}
12059   ins_pipe(ialu_reg_mem);
12060 %}
12061 
12062 // Arithmetic Shift Right by 8-bit immediate
12063 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12064 %{
12065   predicate(!UseAPX);
12066   match(Set dst (RShiftI dst shift));
12067   effect(KILL cr);
12068 
12069   format %{ "sarl    $dst, $shift" %}
12070   ins_encode %{
12071     __ sarl($dst$$Register, $shift$$constant);
12072   %}
12073   ins_pipe(ialu_mem_imm);
12074 %}
12075 
12076 // Arithmetic Shift Right by 8-bit immediate
12077 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12078 %{
12079   predicate(UseAPX);
12080   match(Set dst (RShiftI src shift));
12081   effect(KILL cr);
12082   flag(PD::Flag_ndd_demotable_opr1);
12083 
12084   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12085   ins_encode %{
12086     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12087   %}
12088   ins_pipe(ialu_mem_imm);
12089 %}
12090 
12091 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12092 %{
12093   predicate(UseAPX);
12094   match(Set dst (RShiftI (LoadI src) shift));
12095   effect(KILL cr);
12096 
12097   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12098   ins_encode %{
12099     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12100   %}
12101   ins_pipe(ialu_mem_imm);
12102 %}
12103 
12104 // Arithmetic Shift Right by 8-bit immediate
12105 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12106 %{
12107   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12108   effect(KILL cr);
12109 
12110   format %{ "sarl    $dst, $shift" %}
12111   ins_encode %{
12112     __ sarl($dst$$Address, $shift$$constant);
12113   %}
12114   ins_pipe(ialu_mem_imm);
12115 %}
12116 
12117 // Arithmetic Shift Right by variable
12118 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12119 %{
12120   predicate(!VM_Version::supports_bmi2());
12121   match(Set dst (RShiftI dst shift));
12122   effect(KILL cr);
12123 
12124   format %{ "sarl    $dst, $shift" %}
12125   ins_encode %{
12126     __ sarl($dst$$Register);
12127   %}
12128   ins_pipe(ialu_reg_reg);
12129 %}
12130 
12131 // Arithmetic Shift Right by variable
12132 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12133 %{
12134   predicate(!VM_Version::supports_bmi2());
12135   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12136   effect(KILL cr);
12137 
12138   format %{ "sarl    $dst, $shift" %}
12139   ins_encode %{
12140     __ sarl($dst$$Address);
12141   %}
12142   ins_pipe(ialu_mem_reg);
12143 %}
12144 
12145 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12146 %{
12147   predicate(VM_Version::supports_bmi2());
12148   match(Set dst (RShiftI src shift));
12149 
12150   format %{ "sarxl   $dst, $src, $shift" %}
12151   ins_encode %{
12152     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12153   %}
12154   ins_pipe(ialu_reg_reg);
12155 %}
12156 
12157 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12158 %{
12159   predicate(VM_Version::supports_bmi2());
12160   match(Set dst (RShiftI (LoadI src) shift));
12161   ins_cost(175);
12162   format %{ "sarxl   $dst, $src, $shift" %}
12163   ins_encode %{
12164     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12165   %}
12166   ins_pipe(ialu_reg_mem);
12167 %}
12168 
12169 // Logical Shift Right by 8-bit immediate
12170 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12171 %{
12172   predicate(!UseAPX);
12173   match(Set dst (URShiftI dst shift));
12174   effect(KILL cr);
12175 
12176   format %{ "shrl    $dst, $shift" %}
12177   ins_encode %{
12178     __ shrl($dst$$Register, $shift$$constant);
12179   %}
12180   ins_pipe(ialu_reg);
12181 %}
12182 
12183 // Logical Shift Right by 8-bit immediate
12184 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12185 %{
12186   predicate(UseAPX);
12187   match(Set dst (URShiftI src shift));
12188   effect(KILL cr);
12189   flag(PD::Flag_ndd_demotable_opr1);
12190 
12191   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12192   ins_encode %{
12193     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12194   %}
12195   ins_pipe(ialu_reg);
12196 %}
12197 
12198 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12199 %{
12200   predicate(UseAPX);
12201   match(Set dst (URShiftI (LoadI src) shift));
12202   effect(KILL cr);
12203 
12204   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12205   ins_encode %{
12206     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12207   %}
12208   ins_pipe(ialu_reg);
12209 %}
12210 
12211 // Logical Shift Right by 8-bit immediate
12212 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12213 %{
12214   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12215   effect(KILL cr);
12216 
12217   format %{ "shrl    $dst, $shift" %}
12218   ins_encode %{
12219     __ shrl($dst$$Address, $shift$$constant);
12220   %}
12221   ins_pipe(ialu_mem_imm);
12222 %}
12223 
12224 // Logical Shift Right by variable
12225 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12226 %{
12227   predicate(!VM_Version::supports_bmi2());
12228   match(Set dst (URShiftI dst shift));
12229   effect(KILL cr);
12230 
12231   format %{ "shrl    $dst, $shift" %}
12232   ins_encode %{
12233     __ shrl($dst$$Register);
12234   %}
12235   ins_pipe(ialu_reg_reg);
12236 %}
12237 
12238 // Logical Shift Right by variable
12239 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12240 %{
12241   predicate(!VM_Version::supports_bmi2());
12242   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12243   effect(KILL cr);
12244 
12245   format %{ "shrl    $dst, $shift" %}
12246   ins_encode %{
12247     __ shrl($dst$$Address);
12248   %}
12249   ins_pipe(ialu_mem_reg);
12250 %}
12251 
12252 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12253 %{
12254   predicate(VM_Version::supports_bmi2());
12255   match(Set dst (URShiftI src shift));
12256 
12257   format %{ "shrxl   $dst, $src, $shift" %}
12258   ins_encode %{
12259     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12260   %}
12261   ins_pipe(ialu_reg_reg);
12262 %}
12263 
12264 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12265 %{
12266   predicate(VM_Version::supports_bmi2());
12267   match(Set dst (URShiftI (LoadI src) shift));
12268   ins_cost(175);
12269   format %{ "shrxl   $dst, $src, $shift" %}
12270   ins_encode %{
12271     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12272   %}
12273   ins_pipe(ialu_reg_mem);
12274 %}
12275 
12276 // Long Shift Instructions
12277 // Shift Left by one, two, three
12278 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12279 %{
12280   predicate(!UseAPX);
12281   match(Set dst (LShiftL dst shift));
12282   effect(KILL cr);
12283 
12284   format %{ "salq    $dst, $shift" %}
12285   ins_encode %{
12286     __ salq($dst$$Register, $shift$$constant);
12287   %}
12288   ins_pipe(ialu_reg);
12289 %}
12290 
12291 // Shift Left by one, two, three
12292 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12293 %{
12294   predicate(UseAPX);
12295   match(Set dst (LShiftL src shift));
12296   effect(KILL cr);
12297   flag(PD::Flag_ndd_demotable_opr1);
12298 
12299   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12300   ins_encode %{
12301     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12302   %}
12303   ins_pipe(ialu_reg);
12304 %}
12305 
12306 // Shift Left by 8-bit immediate
12307 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12308 %{
12309   predicate(!UseAPX);
12310   match(Set dst (LShiftL dst shift));
12311   effect(KILL cr);
12312 
12313   format %{ "salq    $dst, $shift" %}
12314   ins_encode %{
12315     __ salq($dst$$Register, $shift$$constant);
12316   %}
12317   ins_pipe(ialu_reg);
12318 %}
12319 
12320 // Shift Left by 8-bit immediate
12321 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12322 %{
12323   predicate(UseAPX);
12324   match(Set dst (LShiftL src shift));
12325   effect(KILL cr);
12326   flag(PD::Flag_ndd_demotable_opr1);
12327 
12328   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12329   ins_encode %{
12330     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12331   %}
12332   ins_pipe(ialu_reg);
12333 %}
12334 
12335 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12336 %{
12337   predicate(UseAPX);
12338   match(Set dst (LShiftL (LoadL src) shift));
12339   effect(KILL cr);
12340 
12341   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12342   ins_encode %{
12343     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12344   %}
12345   ins_pipe(ialu_reg);
12346 %}
12347 
12348 // Shift Left by 8-bit immediate
12349 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12350 %{
12351   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12352   effect(KILL cr);
12353 
12354   format %{ "salq    $dst, $shift" %}
12355   ins_encode %{
12356     __ salq($dst$$Address, $shift$$constant);
12357   %}
12358   ins_pipe(ialu_mem_imm);
12359 %}
12360 
12361 // Shift Left by variable
12362 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12363 %{
12364   predicate(!VM_Version::supports_bmi2());
12365   match(Set dst (LShiftL dst shift));
12366   effect(KILL cr);
12367 
12368   format %{ "salq    $dst, $shift" %}
12369   ins_encode %{
12370     __ salq($dst$$Register);
12371   %}
12372   ins_pipe(ialu_reg_reg);
12373 %}
12374 
12375 // Shift Left by variable
12376 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12377 %{
12378   predicate(!VM_Version::supports_bmi2());
12379   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12380   effect(KILL cr);
12381 
12382   format %{ "salq    $dst, $shift" %}
12383   ins_encode %{
12384     __ salq($dst$$Address);
12385   %}
12386   ins_pipe(ialu_mem_reg);
12387 %}
12388 
12389 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12390 %{
12391   predicate(VM_Version::supports_bmi2());
12392   match(Set dst (LShiftL src shift));
12393 
12394   format %{ "shlxq   $dst, $src, $shift" %}
12395   ins_encode %{
12396     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12397   %}
12398   ins_pipe(ialu_reg_reg);
12399 %}
12400 
12401 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12402 %{
12403   predicate(VM_Version::supports_bmi2());
12404   match(Set dst (LShiftL (LoadL src) shift));
12405   ins_cost(175);
12406   format %{ "shlxq   $dst, $src, $shift" %}
12407   ins_encode %{
12408     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12409   %}
12410   ins_pipe(ialu_reg_mem);
12411 %}
12412 
12413 // Arithmetic Shift Right by 8-bit immediate
12414 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12415 %{
12416   predicate(!UseAPX);
12417   match(Set dst (RShiftL dst shift));
12418   effect(KILL cr);
12419 
12420   format %{ "sarq    $dst, $shift" %}
12421   ins_encode %{
12422     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12423   %}
12424   ins_pipe(ialu_mem_imm);
12425 %}
12426 
12427 // Arithmetic Shift Right by 8-bit immediate
12428 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12429 %{
12430   predicate(UseAPX);
12431   match(Set dst (RShiftL src shift));
12432   effect(KILL cr);
12433   flag(PD::Flag_ndd_demotable_opr1);
12434 
12435   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12436   ins_encode %{
12437     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12438   %}
12439   ins_pipe(ialu_mem_imm);
12440 %}
12441 
12442 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12443 %{
12444   predicate(UseAPX);
12445   match(Set dst (RShiftL (LoadL src) shift));
12446   effect(KILL cr);
12447 
12448   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12449   ins_encode %{
12450     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12451   %}
12452   ins_pipe(ialu_mem_imm);
12453 %}
12454 
12455 // Arithmetic Shift Right by 8-bit immediate
12456 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12457 %{
12458   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12459   effect(KILL cr);
12460 
12461   format %{ "sarq    $dst, $shift" %}
12462   ins_encode %{
12463     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12464   %}
12465   ins_pipe(ialu_mem_imm);
12466 %}
12467 
12468 // Arithmetic Shift Right by variable
12469 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12470 %{
12471   predicate(!VM_Version::supports_bmi2());
12472   match(Set dst (RShiftL dst shift));
12473   effect(KILL cr);
12474 
12475   format %{ "sarq    $dst, $shift" %}
12476   ins_encode %{
12477     __ sarq($dst$$Register);
12478   %}
12479   ins_pipe(ialu_reg_reg);
12480 %}
12481 
12482 // Arithmetic Shift Right by variable
12483 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12484 %{
12485   predicate(!VM_Version::supports_bmi2());
12486   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12487   effect(KILL cr);
12488 
12489   format %{ "sarq    $dst, $shift" %}
12490   ins_encode %{
12491     __ sarq($dst$$Address);
12492   %}
12493   ins_pipe(ialu_mem_reg);
12494 %}
12495 
12496 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12497 %{
12498   predicate(VM_Version::supports_bmi2());
12499   match(Set dst (RShiftL src shift));
12500 
12501   format %{ "sarxq   $dst, $src, $shift" %}
12502   ins_encode %{
12503     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12504   %}
12505   ins_pipe(ialu_reg_reg);
12506 %}
12507 
12508 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12509 %{
12510   predicate(VM_Version::supports_bmi2());
12511   match(Set dst (RShiftL (LoadL src) shift));
12512   ins_cost(175);
12513   format %{ "sarxq   $dst, $src, $shift" %}
12514   ins_encode %{
12515     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12516   %}
12517   ins_pipe(ialu_reg_mem);
12518 %}
12519 
12520 // Logical Shift Right by 8-bit immediate
12521 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12522 %{
12523   predicate(!UseAPX);
12524   match(Set dst (URShiftL dst shift));
12525   effect(KILL cr);
12526 
12527   format %{ "shrq    $dst, $shift" %}
12528   ins_encode %{
12529     __ shrq($dst$$Register, $shift$$constant);
12530   %}
12531   ins_pipe(ialu_reg);
12532 %}
12533 
12534 // Logical Shift Right by 8-bit immediate
12535 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12536 %{
12537   predicate(UseAPX);
12538   match(Set dst (URShiftL src shift));
12539   effect(KILL cr);
12540   flag(PD::Flag_ndd_demotable_opr1);
12541 
12542   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12543   ins_encode %{
12544     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12545   %}
12546   ins_pipe(ialu_reg);
12547 %}
12548 
12549 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12550 %{
12551   predicate(UseAPX);
12552   match(Set dst (URShiftL (LoadL src) shift));
12553   effect(KILL cr);
12554 
12555   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12556   ins_encode %{
12557     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12558   %}
12559   ins_pipe(ialu_reg);
12560 %}
12561 
12562 // Logical Shift Right by 8-bit immediate
12563 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12564 %{
12565   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12566   effect(KILL cr);
12567 
12568   format %{ "shrq    $dst, $shift" %}
12569   ins_encode %{
12570     __ shrq($dst$$Address, $shift$$constant);
12571   %}
12572   ins_pipe(ialu_mem_imm);
12573 %}
12574 
12575 // Logical Shift Right by variable
12576 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12577 %{
12578   predicate(!VM_Version::supports_bmi2());
12579   match(Set dst (URShiftL dst shift));
12580   effect(KILL cr);
12581 
12582   format %{ "shrq    $dst, $shift" %}
12583   ins_encode %{
12584     __ shrq($dst$$Register);
12585   %}
12586   ins_pipe(ialu_reg_reg);
12587 %}
12588 
12589 // Logical Shift Right by variable
12590 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12591 %{
12592   predicate(!VM_Version::supports_bmi2());
12593   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12594   effect(KILL cr);
12595 
12596   format %{ "shrq    $dst, $shift" %}
12597   ins_encode %{
12598     __ shrq($dst$$Address);
12599   %}
12600   ins_pipe(ialu_mem_reg);
12601 %}
12602 
12603 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12604 %{
12605   predicate(VM_Version::supports_bmi2());
12606   match(Set dst (URShiftL src shift));
12607 
12608   format %{ "shrxq   $dst, $src, $shift" %}
12609   ins_encode %{
12610     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12611   %}
12612   ins_pipe(ialu_reg_reg);
12613 %}
12614 
12615 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12616 %{
12617   predicate(VM_Version::supports_bmi2());
12618   match(Set dst (URShiftL (LoadL src) shift));
12619   ins_cost(175);
12620   format %{ "shrxq   $dst, $src, $shift" %}
12621   ins_encode %{
12622     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12623   %}
12624   ins_pipe(ialu_reg_mem);
12625 %}
12626 
12627 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12628 // This idiom is used by the compiler for the i2b bytecode.
12629 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12630 %{
12631   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12632 
12633   format %{ "movsbl  $dst, $src\t# i2b" %}
12634   ins_encode %{
12635     __ movsbl($dst$$Register, $src$$Register);
12636   %}
12637   ins_pipe(ialu_reg_reg);
12638 %}
12639 
12640 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12641 // This idiom is used by the compiler the i2s bytecode.
12642 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12643 %{
12644   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12645 
12646   format %{ "movswl  $dst, $src\t# i2s" %}
12647   ins_encode %{
12648     __ movswl($dst$$Register, $src$$Register);
12649   %}
12650   ins_pipe(ialu_reg_reg);
12651 %}
12652 
12653 // ROL/ROR instructions
12654 
12655 // Rotate left by constant.
12656 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12657 %{
12658   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12659   match(Set dst (RotateLeft dst shift));
12660   effect(KILL cr);
12661   format %{ "roll    $dst, $shift" %}
12662   ins_encode %{
12663     __ roll($dst$$Register, $shift$$constant);
12664   %}
12665   ins_pipe(ialu_reg);
12666 %}
12667 
12668 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12669 %{
12670   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12671   match(Set dst (RotateLeft src shift));
12672   format %{ "rolxl   $dst, $src, $shift" %}
12673   ins_encode %{
12674     int shift = 32 - ($shift$$constant & 31);
12675     __ rorxl($dst$$Register, $src$$Register, shift);
12676   %}
12677   ins_pipe(ialu_reg_reg);
12678 %}
12679 
12680 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12681 %{
12682   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12683   match(Set dst (RotateLeft (LoadI src) shift));
12684   ins_cost(175);
12685   format %{ "rolxl   $dst, $src, $shift" %}
12686   ins_encode %{
12687     int shift = 32 - ($shift$$constant & 31);
12688     __ rorxl($dst$$Register, $src$$Address, shift);
12689   %}
12690   ins_pipe(ialu_reg_mem);
12691 %}
12692 
12693 // Rotate Left by variable
12694 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12695 %{
12696   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12697   match(Set dst (RotateLeft dst shift));
12698   effect(KILL cr);
12699   format %{ "roll    $dst, $shift" %}
12700   ins_encode %{
12701     __ roll($dst$$Register);
12702   %}
12703   ins_pipe(ialu_reg_reg);
12704 %}
12705 
12706 // Rotate Left by variable
12707 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12708 %{
12709   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12710   match(Set dst (RotateLeft src shift));
12711   effect(KILL cr);
12712   flag(PD::Flag_ndd_demotable_opr1);
12713 
12714   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12715   ins_encode %{
12716     __ eroll($dst$$Register, $src$$Register, false);
12717   %}
12718   ins_pipe(ialu_reg_reg);
12719 %}
12720 
12721 // Rotate Right by constant.
12722 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12723 %{
12724   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12725   match(Set dst (RotateRight dst shift));
12726   effect(KILL cr);
12727   format %{ "rorl    $dst, $shift" %}
12728   ins_encode %{
12729     __ rorl($dst$$Register, $shift$$constant);
12730   %}
12731   ins_pipe(ialu_reg);
12732 %}
12733 
12734 // Rotate Right by constant.
12735 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12736 %{
12737   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12738   match(Set dst (RotateRight src shift));
12739   format %{ "rorxl   $dst, $src, $shift" %}
12740   ins_encode %{
12741     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12742   %}
12743   ins_pipe(ialu_reg_reg);
12744 %}
12745 
12746 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12747 %{
12748   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12749   match(Set dst (RotateRight (LoadI src) shift));
12750   ins_cost(175);
12751   format %{ "rorxl   $dst, $src, $shift" %}
12752   ins_encode %{
12753     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12754   %}
12755   ins_pipe(ialu_reg_mem);
12756 %}
12757 
12758 // Rotate Right by variable
12759 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12760 %{
12761   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12762   match(Set dst (RotateRight dst shift));
12763   effect(KILL cr);
12764   format %{ "rorl    $dst, $shift" %}
12765   ins_encode %{
12766     __ rorl($dst$$Register);
12767   %}
12768   ins_pipe(ialu_reg_reg);
12769 %}
12770 
12771 // Rotate Right by variable
12772 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12773 %{
12774   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12775   match(Set dst (RotateRight src shift));
12776   effect(KILL cr);
12777   flag(PD::Flag_ndd_demotable_opr1);
12778 
12779   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12780   ins_encode %{
12781     __ erorl($dst$$Register, $src$$Register, false);
12782   %}
12783   ins_pipe(ialu_reg_reg);
12784 %}
12785 
12786 // Rotate Left by constant.
12787 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12788 %{
12789   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12790   match(Set dst (RotateLeft dst shift));
12791   effect(KILL cr);
12792   format %{ "rolq    $dst, $shift" %}
12793   ins_encode %{
12794     __ rolq($dst$$Register, $shift$$constant);
12795   %}
12796   ins_pipe(ialu_reg);
12797 %}
12798 
12799 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12800 %{
12801   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12802   match(Set dst (RotateLeft src shift));
12803   format %{ "rolxq   $dst, $src, $shift" %}
12804   ins_encode %{
12805     int shift = 64 - ($shift$$constant & 63);
12806     __ rorxq($dst$$Register, $src$$Register, shift);
12807   %}
12808   ins_pipe(ialu_reg_reg);
12809 %}
12810 
12811 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12812 %{
12813   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12814   match(Set dst (RotateLeft (LoadL src) shift));
12815   ins_cost(175);
12816   format %{ "rolxq   $dst, $src, $shift" %}
12817   ins_encode %{
12818     int shift = 64 - ($shift$$constant & 63);
12819     __ rorxq($dst$$Register, $src$$Address, shift);
12820   %}
12821   ins_pipe(ialu_reg_mem);
12822 %}
12823 
12824 // Rotate Left by variable
12825 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12826 %{
12827   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12828   match(Set dst (RotateLeft dst shift));
12829   effect(KILL cr);
12830 
12831   format %{ "rolq    $dst, $shift" %}
12832   ins_encode %{
12833     __ rolq($dst$$Register);
12834   %}
12835   ins_pipe(ialu_reg_reg);
12836 %}
12837 
12838 // Rotate Left by variable
12839 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12840 %{
12841   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12842   match(Set dst (RotateLeft src shift));
12843   effect(KILL cr);
12844   flag(PD::Flag_ndd_demotable_opr1);
12845 
12846   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12847   ins_encode %{
12848     __ erolq($dst$$Register, $src$$Register, false);
12849   %}
12850   ins_pipe(ialu_reg_reg);
12851 %}
12852 
12853 // Rotate Right by constant.
12854 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12855 %{
12856   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12857   match(Set dst (RotateRight dst shift));
12858   effect(KILL cr);
12859   format %{ "rorq    $dst, $shift" %}
12860   ins_encode %{
12861     __ rorq($dst$$Register, $shift$$constant);
12862   %}
12863   ins_pipe(ialu_reg);
12864 %}
12865 
12866 // Rotate Right by constant
12867 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12868 %{
12869   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12870   match(Set dst (RotateRight src shift));
12871   format %{ "rorxq   $dst, $src, $shift" %}
12872   ins_encode %{
12873     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12874   %}
12875   ins_pipe(ialu_reg_reg);
12876 %}
12877 
12878 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12879 %{
12880   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12881   match(Set dst (RotateRight (LoadL src) shift));
12882   ins_cost(175);
12883   format %{ "rorxq   $dst, $src, $shift" %}
12884   ins_encode %{
12885     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12886   %}
12887   ins_pipe(ialu_reg_mem);
12888 %}
12889 
12890 // Rotate Right by variable
12891 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12892 %{
12893   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12894   match(Set dst (RotateRight dst shift));
12895   effect(KILL cr);
12896   format %{ "rorq    $dst, $shift" %}
12897   ins_encode %{
12898     __ rorq($dst$$Register);
12899   %}
12900   ins_pipe(ialu_reg_reg);
12901 %}
12902 
12903 // Rotate Right by variable
12904 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12905 %{
12906   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12907   match(Set dst (RotateRight src shift));
12908   effect(KILL cr);
12909   flag(PD::Flag_ndd_demotable_opr1);
12910 
12911   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12912   ins_encode %{
12913     __ erorq($dst$$Register, $src$$Register, false);
12914   %}
12915   ins_pipe(ialu_reg_reg);
12916 %}
12917 
12918 //----------------------------- CompressBits/ExpandBits ------------------------
12919 
12920 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12921   predicate(n->bottom_type()->isa_long());
12922   match(Set dst (CompressBits src mask));
12923   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12924   ins_encode %{
12925     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12926   %}
12927   ins_pipe( pipe_slow );
12928 %}
12929 
12930 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12931   predicate(n->bottom_type()->isa_long());
12932   match(Set dst (ExpandBits src mask));
12933   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12934   ins_encode %{
12935     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12936   %}
12937   ins_pipe( pipe_slow );
12938 %}
12939 
12940 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12941   predicate(n->bottom_type()->isa_long());
12942   match(Set dst (CompressBits src (LoadL mask)));
12943   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12944   ins_encode %{
12945     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12946   %}
12947   ins_pipe( pipe_slow );
12948 %}
12949 
12950 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12951   predicate(n->bottom_type()->isa_long());
12952   match(Set dst (ExpandBits src (LoadL mask)));
12953   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12954   ins_encode %{
12955     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12956   %}
12957   ins_pipe( pipe_slow );
12958 %}
12959 
12960 
12961 // Logical Instructions
12962 
12963 // Integer Logical Instructions
12964 
12965 // And Instructions
12966 // And Register with Register
12967 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12968 %{
12969   predicate(!UseAPX);
12970   match(Set dst (AndI dst src));
12971   effect(KILL cr);
12972   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12973 
12974   format %{ "andl    $dst, $src\t# int" %}
12975   ins_encode %{
12976     __ andl($dst$$Register, $src$$Register);
12977   %}
12978   ins_pipe(ialu_reg_reg);
12979 %}
12980 
12981 // And Register with Register using New Data Destination (NDD)
12982 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12983 %{
12984   predicate(UseAPX);
12985   match(Set dst (AndI src1 src2));
12986   effect(KILL cr);
12987   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12988 
12989   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12990   ins_encode %{
12991     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12992 
12993   %}
12994   ins_pipe(ialu_reg_reg);
12995 %}
12996 
12997 // And Register with Immediate 255
12998 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12999 %{
13000   match(Set dst (AndI src mask));
13001 
13002   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
13003   ins_encode %{
13004     __ movzbl($dst$$Register, $src$$Register);
13005   %}
13006   ins_pipe(ialu_reg);
13007 %}
13008 
13009 // And Register with Immediate 255 and promote to long
13010 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
13011 %{
13012   match(Set dst (ConvI2L (AndI src mask)));
13013 
13014   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
13015   ins_encode %{
13016     __ movzbl($dst$$Register, $src$$Register);
13017   %}
13018   ins_pipe(ialu_reg);
13019 %}
13020 
13021 // And Register with Immediate 65535
13022 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13023 %{
13024   match(Set dst (AndI src mask));
13025 
13026   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
13027   ins_encode %{
13028     __ movzwl($dst$$Register, $src$$Register);
13029   %}
13030   ins_pipe(ialu_reg);
13031 %}
13032 
13033 // And Register with Immediate 65535 and promote to long
13034 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13035 %{
13036   match(Set dst (ConvI2L (AndI src mask)));
13037 
13038   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
13039   ins_encode %{
13040     __ movzwl($dst$$Register, $src$$Register);
13041   %}
13042   ins_pipe(ialu_reg);
13043 %}
13044 
13045 // Can skip int2long conversions after AND with small bitmask
13046 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13047 %{
13048   predicate(VM_Version::supports_bmi2());
13049   ins_cost(125);
13050   effect(TEMP tmp, KILL cr);
13051   match(Set dst (ConvI2L (AndI src mask)));
13052   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
13053   ins_encode %{
13054     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13055     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13056   %}
13057   ins_pipe(ialu_reg_reg);
13058 %}
13059 
13060 // And Register with Immediate
13061 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13062 %{
13063   predicate(!UseAPX);
13064   match(Set dst (AndI dst src));
13065   effect(KILL cr);
13066   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13067 
13068   format %{ "andl    $dst, $src\t# int" %}
13069   ins_encode %{
13070     __ andl($dst$$Register, $src$$constant);
13071   %}
13072   ins_pipe(ialu_reg);
13073 %}
13074 
13075 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13076 %{
13077   predicate(UseAPX);
13078   match(Set dst (AndI src1 src2));
13079   effect(KILL cr);
13080   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13081 
13082   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13083   ins_encode %{
13084     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13085   %}
13086   ins_pipe(ialu_reg);
13087 %}
13088 
13089 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13090 %{
13091   predicate(UseAPX);
13092   match(Set dst (AndI (LoadI src1) src2));
13093   effect(KILL cr);
13094   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13095 
13096   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13097   ins_encode %{
13098     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13099   %}
13100   ins_pipe(ialu_reg);
13101 %}
13102 
13103 // And Register with Memory
13104 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13105 %{
13106   predicate(!UseAPX);
13107   match(Set dst (AndI dst (LoadI src)));
13108   effect(KILL cr);
13109   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13110 
13111   ins_cost(150);
13112   format %{ "andl    $dst, $src\t# int" %}
13113   ins_encode %{
13114     __ andl($dst$$Register, $src$$Address);
13115   %}
13116   ins_pipe(ialu_reg_mem);
13117 %}
13118 
13119 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13120 %{
13121   predicate(UseAPX);
13122   match(Set dst (AndI src1 (LoadI src2)));
13123   effect(KILL cr);
13124   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13125 
13126   ins_cost(150);
13127   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13128   ins_encode %{
13129     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13130   %}
13131   ins_pipe(ialu_reg_mem);
13132 %}
13133 
13134 // And Memory with Register
13135 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13136 %{
13137   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13138   effect(KILL cr);
13139   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13140 
13141   ins_cost(150);
13142   format %{ "andb    $dst, $src\t# byte" %}
13143   ins_encode %{
13144     __ andb($dst$$Address, $src$$Register);
13145   %}
13146   ins_pipe(ialu_mem_reg);
13147 %}
13148 
13149 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13150 %{
13151   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13152   effect(KILL cr);
13153   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13154 
13155   ins_cost(150);
13156   format %{ "andl    $dst, $src\t# int" %}
13157   ins_encode %{
13158     __ andl($dst$$Address, $src$$Register);
13159   %}
13160   ins_pipe(ialu_mem_reg);
13161 %}
13162 
13163 // And Memory with Immediate
13164 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13165 %{
13166   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13167   effect(KILL cr);
13168   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13169 
13170   ins_cost(125);
13171   format %{ "andl    $dst, $src\t# int" %}
13172   ins_encode %{
13173     __ andl($dst$$Address, $src$$constant);
13174   %}
13175   ins_pipe(ialu_mem_imm);
13176 %}
13177 
13178 // BMI1 instructions
13179 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13180   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13181   predicate(UseBMI1Instructions);
13182   effect(KILL cr);
13183   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13184 
13185   ins_cost(125);
13186   format %{ "andnl  $dst, $src1, $src2" %}
13187 
13188   ins_encode %{
13189     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13190   %}
13191   ins_pipe(ialu_reg_mem);
13192 %}
13193 
13194 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13195   match(Set dst (AndI (XorI src1 minus_1) src2));
13196   predicate(UseBMI1Instructions);
13197   effect(KILL cr);
13198   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13199 
13200   format %{ "andnl  $dst, $src1, $src2" %}
13201 
13202   ins_encode %{
13203     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13204   %}
13205   ins_pipe(ialu_reg);
13206 %}
13207 
13208 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13209   match(Set dst (AndI (SubI imm_zero src) src));
13210   predicate(UseBMI1Instructions);
13211   effect(KILL cr);
13212   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13213 
13214   format %{ "blsil  $dst, $src" %}
13215 
13216   ins_encode %{
13217     __ blsil($dst$$Register, $src$$Register);
13218   %}
13219   ins_pipe(ialu_reg);
13220 %}
13221 
13222 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13223   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13224   predicate(UseBMI1Instructions);
13225   effect(KILL cr);
13226   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13227 
13228   ins_cost(125);
13229   format %{ "blsil  $dst, $src" %}
13230 
13231   ins_encode %{
13232     __ blsil($dst$$Register, $src$$Address);
13233   %}
13234   ins_pipe(ialu_reg_mem);
13235 %}
13236 
13237 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13238 %{
13239   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13240   predicate(UseBMI1Instructions);
13241   effect(KILL cr);
13242   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13243 
13244   ins_cost(125);
13245   format %{ "blsmskl $dst, $src" %}
13246 
13247   ins_encode %{
13248     __ blsmskl($dst$$Register, $src$$Address);
13249   %}
13250   ins_pipe(ialu_reg_mem);
13251 %}
13252 
13253 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13254 %{
13255   match(Set dst (XorI (AddI src minus_1) src));
13256   predicate(UseBMI1Instructions);
13257   effect(KILL cr);
13258   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13259 
13260   format %{ "blsmskl $dst, $src" %}
13261 
13262   ins_encode %{
13263     __ blsmskl($dst$$Register, $src$$Register);
13264   %}
13265 
13266   ins_pipe(ialu_reg);
13267 %}
13268 
13269 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13270 %{
13271   match(Set dst (AndI (AddI src minus_1) src) );
13272   predicate(UseBMI1Instructions);
13273   effect(KILL cr);
13274   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13275 
13276   format %{ "blsrl  $dst, $src" %}
13277 
13278   ins_encode %{
13279     __ blsrl($dst$$Register, $src$$Register);
13280   %}
13281 
13282   ins_pipe(ialu_reg_mem);
13283 %}
13284 
13285 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13286 %{
13287   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13288   predicate(UseBMI1Instructions);
13289   effect(KILL cr);
13290   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13291 
13292   ins_cost(125);
13293   format %{ "blsrl  $dst, $src" %}
13294 
13295   ins_encode %{
13296     __ blsrl($dst$$Register, $src$$Address);
13297   %}
13298 
13299   ins_pipe(ialu_reg);
13300 %}
13301 
13302 // Or Instructions
13303 // Or Register with Register
13304 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13305 %{
13306   predicate(!UseAPX);
13307   match(Set dst (OrI dst src));
13308   effect(KILL cr);
13309   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13310 
13311   format %{ "orl     $dst, $src\t# int" %}
13312   ins_encode %{
13313     __ orl($dst$$Register, $src$$Register);
13314   %}
13315   ins_pipe(ialu_reg_reg);
13316 %}
13317 
13318 // Or Register with Register using New Data Destination (NDD)
13319 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13320 %{
13321   predicate(UseAPX);
13322   match(Set dst (OrI src1 src2));
13323   effect(KILL cr);
13324   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13325 
13326   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13327   ins_encode %{
13328     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13329   %}
13330   ins_pipe(ialu_reg_reg);
13331 %}
13332 
13333 // Or Register with Immediate
13334 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13335 %{
13336   predicate(!UseAPX);
13337   match(Set dst (OrI dst src));
13338   effect(KILL cr);
13339   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13340 
13341   format %{ "orl     $dst, $src\t# int" %}
13342   ins_encode %{
13343     __ orl($dst$$Register, $src$$constant);
13344   %}
13345   ins_pipe(ialu_reg);
13346 %}
13347 
13348 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13349 %{
13350   predicate(UseAPX);
13351   match(Set dst (OrI src1 src2));
13352   effect(KILL cr);
13353   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13354 
13355   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13356   ins_encode %{
13357     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13358   %}
13359   ins_pipe(ialu_reg);
13360 %}
13361 
13362 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13363 %{
13364   predicate(UseAPX);
13365   match(Set dst (OrI src1 src2));
13366   effect(KILL cr);
13367   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13368 
13369   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13370   ins_encode %{
13371     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13372   %}
13373   ins_pipe(ialu_reg);
13374 %}
13375 
13376 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13377 %{
13378   predicate(UseAPX);
13379   match(Set dst (OrI (LoadI src1) src2));
13380   effect(KILL cr);
13381   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13382 
13383   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13384   ins_encode %{
13385     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13386   %}
13387   ins_pipe(ialu_reg);
13388 %}
13389 
13390 // Or Register with Memory
13391 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13392 %{
13393   predicate(!UseAPX);
13394   match(Set dst (OrI dst (LoadI src)));
13395   effect(KILL cr);
13396   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13397 
13398   ins_cost(150);
13399   format %{ "orl     $dst, $src\t# int" %}
13400   ins_encode %{
13401     __ orl($dst$$Register, $src$$Address);
13402   %}
13403   ins_pipe(ialu_reg_mem);
13404 %}
13405 
13406 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13407 %{
13408   predicate(UseAPX);
13409   match(Set dst (OrI src1 (LoadI src2)));
13410   effect(KILL cr);
13411   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13412 
13413   ins_cost(150);
13414   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13415   ins_encode %{
13416     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13417   %}
13418   ins_pipe(ialu_reg_mem);
13419 %}
13420 
13421 // Or Memory with Register
13422 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13423 %{
13424   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13425   effect(KILL cr);
13426   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13427 
13428   ins_cost(150);
13429   format %{ "orb    $dst, $src\t# byte" %}
13430   ins_encode %{
13431     __ orb($dst$$Address, $src$$Register);
13432   %}
13433   ins_pipe(ialu_mem_reg);
13434 %}
13435 
13436 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13437 %{
13438   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13439   effect(KILL cr);
13440   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13441 
13442   ins_cost(150);
13443   format %{ "orl     $dst, $src\t# int" %}
13444   ins_encode %{
13445     __ orl($dst$$Address, $src$$Register);
13446   %}
13447   ins_pipe(ialu_mem_reg);
13448 %}
13449 
13450 // Or Memory with Immediate
13451 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13452 %{
13453   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13454   effect(KILL cr);
13455   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13456 
13457   ins_cost(125);
13458   format %{ "orl     $dst, $src\t# int" %}
13459   ins_encode %{
13460     __ orl($dst$$Address, $src$$constant);
13461   %}
13462   ins_pipe(ialu_mem_imm);
13463 %}
13464 
13465 // Xor Instructions
13466 // Xor Register with Register
13467 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13468 %{
13469   predicate(!UseAPX);
13470   match(Set dst (XorI dst src));
13471   effect(KILL cr);
13472   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13473 
13474   format %{ "xorl    $dst, $src\t# int" %}
13475   ins_encode %{
13476     __ xorl($dst$$Register, $src$$Register);
13477   %}
13478   ins_pipe(ialu_reg_reg);
13479 %}
13480 
13481 // Xor Register with Register using New Data Destination (NDD)
13482 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13483 %{
13484   predicate(UseAPX);
13485   match(Set dst (XorI src1 src2));
13486   effect(KILL cr);
13487   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13488 
13489   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13490   ins_encode %{
13491     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13492   %}
13493   ins_pipe(ialu_reg_reg);
13494 %}
13495 
13496 // Xor Register with Immediate -1
13497 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13498 %{
13499   predicate(!UseAPX);
13500   match(Set dst (XorI dst imm));
13501 
13502   format %{ "notl    $dst" %}
13503   ins_encode %{
13504      __ notl($dst$$Register);
13505   %}
13506   ins_pipe(ialu_reg);
13507 %}
13508 
13509 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13510 %{
13511   match(Set dst (XorI src imm));
13512   predicate(UseAPX);
13513   flag(PD::Flag_ndd_demotable_opr1);
13514 
13515   format %{ "enotl    $dst, $src" %}
13516   ins_encode %{
13517      __ enotl($dst$$Register, $src$$Register);
13518   %}
13519   ins_pipe(ialu_reg);
13520 %}
13521 
13522 // Xor Register with Immediate
13523 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13524 %{
13525   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13526   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13527   match(Set dst (XorI dst src));
13528   effect(KILL cr);
13529   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13530 
13531   format %{ "xorl    $dst, $src\t# int" %}
13532   ins_encode %{
13533     __ xorl($dst$$Register, $src$$constant);
13534   %}
13535   ins_pipe(ialu_reg);
13536 %}
13537 
13538 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13539 %{
13540   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13541   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13542   match(Set dst (XorI src1 src2));
13543   effect(KILL cr);
13544   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13545 
13546   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13547   ins_encode %{
13548     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13549   %}
13550   ins_pipe(ialu_reg);
13551 %}
13552 
13553 // Xor Memory with Immediate
13554 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13555 %{
13556   predicate(UseAPX);
13557   match(Set dst (XorI (LoadI src1) src2));
13558   effect(KILL cr);
13559   ins_cost(150);
13560   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13561 
13562   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13563   ins_encode %{
13564     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13565   %}
13566   ins_pipe(ialu_reg);
13567 %}
13568 
13569 // Xor Register with Memory
13570 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13571 %{
13572   predicate(!UseAPX);
13573   match(Set dst (XorI dst (LoadI src)));
13574   effect(KILL cr);
13575   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13576 
13577   ins_cost(150);
13578   format %{ "xorl    $dst, $src\t# int" %}
13579   ins_encode %{
13580     __ xorl($dst$$Register, $src$$Address);
13581   %}
13582   ins_pipe(ialu_reg_mem);
13583 %}
13584 
13585 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13586 %{
13587   predicate(UseAPX);
13588   match(Set dst (XorI src1 (LoadI src2)));
13589   effect(KILL cr);
13590   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13591 
13592   ins_cost(150);
13593   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13594   ins_encode %{
13595     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13596   %}
13597   ins_pipe(ialu_reg_mem);
13598 %}
13599 
13600 // Xor Memory with Register
13601 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13602 %{
13603   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13604   effect(KILL cr);
13605   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13606 
13607   ins_cost(150);
13608   format %{ "xorb    $dst, $src\t# byte" %}
13609   ins_encode %{
13610     __ xorb($dst$$Address, $src$$Register);
13611   %}
13612   ins_pipe(ialu_mem_reg);
13613 %}
13614 
13615 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13616 %{
13617   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13618   effect(KILL cr);
13619   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13620 
13621   ins_cost(150);
13622   format %{ "xorl    $dst, $src\t# int" %}
13623   ins_encode %{
13624     __ xorl($dst$$Address, $src$$Register);
13625   %}
13626   ins_pipe(ialu_mem_reg);
13627 %}
13628 
13629 // Xor Memory with Immediate
13630 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13631 %{
13632   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13633   effect(KILL cr);
13634   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13635 
13636   ins_cost(125);
13637   format %{ "xorl    $dst, $src\t# int" %}
13638   ins_encode %{
13639     __ xorl($dst$$Address, $src$$constant);
13640   %}
13641   ins_pipe(ialu_mem_imm);
13642 %}
13643 
13644 
13645 // Long Logical Instructions
13646 
13647 // And Instructions
13648 // And Register with Register
13649 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13650 %{
13651   predicate(!UseAPX);
13652   match(Set dst (AndL dst src));
13653   effect(KILL cr);
13654   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13655 
13656   format %{ "andq    $dst, $src\t# long" %}
13657   ins_encode %{
13658     __ andq($dst$$Register, $src$$Register);
13659   %}
13660   ins_pipe(ialu_reg_reg);
13661 %}
13662 
13663 // And Register with Register using New Data Destination (NDD)
13664 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13665 %{
13666   predicate(UseAPX);
13667   match(Set dst (AndL src1 src2));
13668   effect(KILL cr);
13669   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13670 
13671   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13672   ins_encode %{
13673     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13674 
13675   %}
13676   ins_pipe(ialu_reg_reg);
13677 %}
13678 
13679 // And Register with Immediate 255
13680 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13681 %{
13682   match(Set dst (AndL src mask));
13683 
13684   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13685   ins_encode %{
13686     // movzbl zeroes out the upper 32-bit and does not need REX.W
13687     __ movzbl($dst$$Register, $src$$Register);
13688   %}
13689   ins_pipe(ialu_reg);
13690 %}
13691 
13692 // And Register with Immediate 65535
13693 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13694 %{
13695   match(Set dst (AndL src mask));
13696 
13697   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13698   ins_encode %{
13699     // movzwl zeroes out the upper 32-bit and does not need REX.W
13700     __ movzwl($dst$$Register, $src$$Register);
13701   %}
13702   ins_pipe(ialu_reg);
13703 %}
13704 
13705 // And Register with Immediate
13706 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13707 %{
13708   predicate(!UseAPX);
13709   match(Set dst (AndL dst src));
13710   effect(KILL cr);
13711   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13712 
13713   format %{ "andq    $dst, $src\t# long" %}
13714   ins_encode %{
13715     __ andq($dst$$Register, $src$$constant);
13716   %}
13717   ins_pipe(ialu_reg);
13718 %}
13719 
13720 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13721 %{
13722   predicate(UseAPX);
13723   match(Set dst (AndL src1 src2));
13724   effect(KILL cr);
13725   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13726 
13727   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13728   ins_encode %{
13729     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13730   %}
13731   ins_pipe(ialu_reg);
13732 %}
13733 
13734 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13735 %{
13736   predicate(UseAPX);
13737   match(Set dst (AndL (LoadL src1) src2));
13738   effect(KILL cr);
13739   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13740 
13741   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13742   ins_encode %{
13743     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13744   %}
13745   ins_pipe(ialu_reg);
13746 %}
13747 
13748 // And Register with Memory
13749 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13750 %{
13751   predicate(!UseAPX);
13752   match(Set dst (AndL dst (LoadL src)));
13753   effect(KILL cr);
13754   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13755 
13756   ins_cost(150);
13757   format %{ "andq    $dst, $src\t# long" %}
13758   ins_encode %{
13759     __ andq($dst$$Register, $src$$Address);
13760   %}
13761   ins_pipe(ialu_reg_mem);
13762 %}
13763 
13764 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13765 %{
13766   predicate(UseAPX);
13767   match(Set dst (AndL src1 (LoadL src2)));
13768   effect(KILL cr);
13769   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13770 
13771   ins_cost(150);
13772   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13773   ins_encode %{
13774     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13775   %}
13776   ins_pipe(ialu_reg_mem);
13777 %}
13778 
13779 // And Memory with Register
13780 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13781 %{
13782   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13783   effect(KILL cr);
13784   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13785 
13786   ins_cost(150);
13787   format %{ "andq    $dst, $src\t# long" %}
13788   ins_encode %{
13789     __ andq($dst$$Address, $src$$Register);
13790   %}
13791   ins_pipe(ialu_mem_reg);
13792 %}
13793 
13794 // And Memory with Immediate
13795 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13796 %{
13797   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13798   effect(KILL cr);
13799   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13800 
13801   ins_cost(125);
13802   format %{ "andq    $dst, $src\t# long" %}
13803   ins_encode %{
13804     __ andq($dst$$Address, $src$$constant);
13805   %}
13806   ins_pipe(ialu_mem_imm);
13807 %}
13808 
13809 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13810 %{
13811   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13812   // because AND/OR works well enough for 8/32-bit values.
13813   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13814 
13815   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13816   effect(KILL cr);
13817 
13818   ins_cost(125);
13819   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13820   ins_encode %{
13821     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13822   %}
13823   ins_pipe(ialu_mem_imm);
13824 %}
13825 
13826 // BMI1 instructions
13827 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13828   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13829   predicate(UseBMI1Instructions);
13830   effect(KILL cr);
13831   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13832 
13833   ins_cost(125);
13834   format %{ "andnq  $dst, $src1, $src2" %}
13835 
13836   ins_encode %{
13837     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13838   %}
13839   ins_pipe(ialu_reg_mem);
13840 %}
13841 
13842 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13843   match(Set dst (AndL (XorL src1 minus_1) src2));
13844   predicate(UseBMI1Instructions);
13845   effect(KILL cr);
13846   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13847 
13848   format %{ "andnq  $dst, $src1, $src2" %}
13849 
13850   ins_encode %{
13851   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13852   %}
13853   ins_pipe(ialu_reg_mem);
13854 %}
13855 
13856 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13857   match(Set dst (AndL (SubL imm_zero src) src));
13858   predicate(UseBMI1Instructions);
13859   effect(KILL cr);
13860   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13861 
13862   format %{ "blsiq  $dst, $src" %}
13863 
13864   ins_encode %{
13865     __ blsiq($dst$$Register, $src$$Register);
13866   %}
13867   ins_pipe(ialu_reg);
13868 %}
13869 
13870 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13871   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13872   predicate(UseBMI1Instructions);
13873   effect(KILL cr);
13874   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13875 
13876   ins_cost(125);
13877   format %{ "blsiq  $dst, $src" %}
13878 
13879   ins_encode %{
13880     __ blsiq($dst$$Register, $src$$Address);
13881   %}
13882   ins_pipe(ialu_reg_mem);
13883 %}
13884 
13885 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13886 %{
13887   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13888   predicate(UseBMI1Instructions);
13889   effect(KILL cr);
13890   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13891 
13892   ins_cost(125);
13893   format %{ "blsmskq $dst, $src" %}
13894 
13895   ins_encode %{
13896     __ blsmskq($dst$$Register, $src$$Address);
13897   %}
13898   ins_pipe(ialu_reg_mem);
13899 %}
13900 
13901 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13902 %{
13903   match(Set dst (XorL (AddL src minus_1) src));
13904   predicate(UseBMI1Instructions);
13905   effect(KILL cr);
13906   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13907 
13908   format %{ "blsmskq $dst, $src" %}
13909 
13910   ins_encode %{
13911     __ blsmskq($dst$$Register, $src$$Register);
13912   %}
13913 
13914   ins_pipe(ialu_reg);
13915 %}
13916 
13917 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13918 %{
13919   match(Set dst (AndL (AddL src minus_1) src) );
13920   predicate(UseBMI1Instructions);
13921   effect(KILL cr);
13922   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13923 
13924   format %{ "blsrq  $dst, $src" %}
13925 
13926   ins_encode %{
13927     __ blsrq($dst$$Register, $src$$Register);
13928   %}
13929 
13930   ins_pipe(ialu_reg);
13931 %}
13932 
13933 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13934 %{
13935   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13936   predicate(UseBMI1Instructions);
13937   effect(KILL cr);
13938   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13939 
13940   ins_cost(125);
13941   format %{ "blsrq  $dst, $src" %}
13942 
13943   ins_encode %{
13944     __ blsrq($dst$$Register, $src$$Address);
13945   %}
13946 
13947   ins_pipe(ialu_reg);
13948 %}
13949 
13950 // Or Instructions
13951 // Or Register with Register
13952 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13953 %{
13954   predicate(!UseAPX);
13955   match(Set dst (OrL dst src));
13956   effect(KILL cr);
13957   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13958 
13959   format %{ "orq     $dst, $src\t# long" %}
13960   ins_encode %{
13961     __ orq($dst$$Register, $src$$Register);
13962   %}
13963   ins_pipe(ialu_reg_reg);
13964 %}
13965 
13966 // Or Register with Register using New Data Destination (NDD)
13967 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13968 %{
13969   predicate(UseAPX);
13970   match(Set dst (OrL src1 src2));
13971   effect(KILL cr);
13972   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13973 
13974   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13975   ins_encode %{
13976     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13977 
13978   %}
13979   ins_pipe(ialu_reg_reg);
13980 %}
13981 
13982 // Use any_RegP to match R15 (TLS register) without spilling.
13983 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13984   match(Set dst (OrL dst (CastP2X src)));
13985   effect(KILL cr);
13986   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13987 
13988   format %{ "orq     $dst, $src\t# long" %}
13989   ins_encode %{
13990     __ orq($dst$$Register, $src$$Register);
13991   %}
13992   ins_pipe(ialu_reg_reg);
13993 %}
13994 
13995 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13996   match(Set dst (OrL src1 (CastP2X src2)));
13997   effect(KILL cr);
13998   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13999 
14000   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14001   ins_encode %{
14002     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14003   %}
14004   ins_pipe(ialu_reg_reg);
14005 %}
14006 
14007 // Or Register with Immediate
14008 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14009 %{
14010   predicate(!UseAPX);
14011   match(Set dst (OrL dst src));
14012   effect(KILL cr);
14013   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14014 
14015   format %{ "orq     $dst, $src\t# long" %}
14016   ins_encode %{
14017     __ orq($dst$$Register, $src$$constant);
14018   %}
14019   ins_pipe(ialu_reg);
14020 %}
14021 
14022 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14023 %{
14024   predicate(UseAPX);
14025   match(Set dst (OrL src1 src2));
14026   effect(KILL cr);
14027   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14028 
14029   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14030   ins_encode %{
14031     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14032   %}
14033   ins_pipe(ialu_reg);
14034 %}
14035 
14036 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14037 %{
14038   predicate(UseAPX);
14039   match(Set dst (OrL src1 src2));
14040   effect(KILL cr);
14041   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14042 
14043   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
14044   ins_encode %{
14045     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14046   %}
14047   ins_pipe(ialu_reg);
14048 %}
14049 
14050 // Or Memory with Immediate
14051 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14052 %{
14053   predicate(UseAPX);
14054   match(Set dst (OrL (LoadL src1) src2));
14055   effect(KILL cr);
14056   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14057 
14058   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14059   ins_encode %{
14060     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14061   %}
14062   ins_pipe(ialu_reg);
14063 %}
14064 
14065 // Or Register with Memory
14066 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14067 %{
14068   predicate(!UseAPX);
14069   match(Set dst (OrL dst (LoadL src)));
14070   effect(KILL cr);
14071   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14072 
14073   ins_cost(150);
14074   format %{ "orq     $dst, $src\t# long" %}
14075   ins_encode %{
14076     __ orq($dst$$Register, $src$$Address);
14077   %}
14078   ins_pipe(ialu_reg_mem);
14079 %}
14080 
14081 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14082 %{
14083   predicate(UseAPX);
14084   match(Set dst (OrL src1 (LoadL src2)));
14085   effect(KILL cr);
14086   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14087 
14088   ins_cost(150);
14089   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14090   ins_encode %{
14091     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14092   %}
14093   ins_pipe(ialu_reg_mem);
14094 %}
14095 
14096 // Or Memory with Register
14097 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14098 %{
14099   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14100   effect(KILL cr);
14101   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14102 
14103   ins_cost(150);
14104   format %{ "orq     $dst, $src\t# long" %}
14105   ins_encode %{
14106     __ orq($dst$$Address, $src$$Register);
14107   %}
14108   ins_pipe(ialu_mem_reg);
14109 %}
14110 
14111 // Or Memory with Immediate
14112 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14113 %{
14114   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14115   effect(KILL cr);
14116   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14117 
14118   ins_cost(125);
14119   format %{ "orq     $dst, $src\t# long" %}
14120   ins_encode %{
14121     __ orq($dst$$Address, $src$$constant);
14122   %}
14123   ins_pipe(ialu_mem_imm);
14124 %}
14125 
14126 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14127 %{
14128   // con should be a pure 64-bit power of 2 immediate
14129   // because AND/OR works well enough for 8/32-bit values.
14130   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14131 
14132   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14133   effect(KILL cr);
14134 
14135   ins_cost(125);
14136   format %{ "btsq    $dst, log2($con)\t# long" %}
14137   ins_encode %{
14138     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14139   %}
14140   ins_pipe(ialu_mem_imm);
14141 %}
14142 
14143 // Xor Instructions
14144 // Xor Register with Register
14145 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14146 %{
14147   predicate(!UseAPX);
14148   match(Set dst (XorL dst src));
14149   effect(KILL cr);
14150   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14151 
14152   format %{ "xorq    $dst, $src\t# long" %}
14153   ins_encode %{
14154     __ xorq($dst$$Register, $src$$Register);
14155   %}
14156   ins_pipe(ialu_reg_reg);
14157 %}
14158 
14159 // Xor Register with Register using New Data Destination (NDD)
14160 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14161 %{
14162   predicate(UseAPX);
14163   match(Set dst (XorL src1 src2));
14164   effect(KILL cr);
14165   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14166 
14167   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14168   ins_encode %{
14169     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14170   %}
14171   ins_pipe(ialu_reg_reg);
14172 %}
14173 
14174 // Xor Register with Immediate -1
14175 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14176 %{
14177   predicate(!UseAPX);
14178   match(Set dst (XorL dst imm));
14179 
14180   format %{ "notq   $dst" %}
14181   ins_encode %{
14182      __ notq($dst$$Register);
14183   %}
14184   ins_pipe(ialu_reg);
14185 %}
14186 
14187 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14188 %{
14189   predicate(UseAPX);
14190   match(Set dst (XorL src imm));
14191   flag(PD::Flag_ndd_demotable_opr1);
14192 
14193   format %{ "enotq   $dst, $src" %}
14194   ins_encode %{
14195     __ enotq($dst$$Register, $src$$Register);
14196   %}
14197   ins_pipe(ialu_reg);
14198 %}
14199 
14200 // Xor Register with Immediate
14201 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14202 %{
14203   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14204   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14205   match(Set dst (XorL dst src));
14206   effect(KILL cr);
14207   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14208 
14209   format %{ "xorq    $dst, $src\t# long" %}
14210   ins_encode %{
14211     __ xorq($dst$$Register, $src$$constant);
14212   %}
14213   ins_pipe(ialu_reg);
14214 %}
14215 
14216 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14217 %{
14218   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14219   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14220   match(Set dst (XorL src1 src2));
14221   effect(KILL cr);
14222   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14223 
14224   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14225   ins_encode %{
14226     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14227   %}
14228   ins_pipe(ialu_reg);
14229 %}
14230 
14231 // Xor Memory with Immediate
14232 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14233 %{
14234   predicate(UseAPX);
14235   match(Set dst (XorL (LoadL src1) src2));
14236   effect(KILL cr);
14237   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14238   ins_cost(150);
14239 
14240   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14241   ins_encode %{
14242     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14243   %}
14244   ins_pipe(ialu_reg);
14245 %}
14246 
14247 // Xor Register with Memory
14248 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14249 %{
14250   predicate(!UseAPX);
14251   match(Set dst (XorL dst (LoadL src)));
14252   effect(KILL cr);
14253   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14254 
14255   ins_cost(150);
14256   format %{ "xorq    $dst, $src\t# long" %}
14257   ins_encode %{
14258     __ xorq($dst$$Register, $src$$Address);
14259   %}
14260   ins_pipe(ialu_reg_mem);
14261 %}
14262 
14263 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14264 %{
14265   predicate(UseAPX);
14266   match(Set dst (XorL src1 (LoadL src2)));
14267   effect(KILL cr);
14268   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14269 
14270   ins_cost(150);
14271   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14272   ins_encode %{
14273     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14274   %}
14275   ins_pipe(ialu_reg_mem);
14276 %}
14277 
14278 // Xor Memory with Register
14279 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14280 %{
14281   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14282   effect(KILL cr);
14283   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14284 
14285   ins_cost(150);
14286   format %{ "xorq    $dst, $src\t# long" %}
14287   ins_encode %{
14288     __ xorq($dst$$Address, $src$$Register);
14289   %}
14290   ins_pipe(ialu_mem_reg);
14291 %}
14292 
14293 // Xor Memory with Immediate
14294 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14295 %{
14296   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14297   effect(KILL cr);
14298   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14299 
14300   ins_cost(125);
14301   format %{ "xorq    $dst, $src\t# long" %}
14302   ins_encode %{
14303     __ xorq($dst$$Address, $src$$constant);
14304   %}
14305   ins_pipe(ialu_mem_imm);
14306 %}
14307 
14308 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14309 %{
14310   match(Set dst (CmpLTMask p q));
14311   effect(KILL cr);
14312 
14313   ins_cost(400);
14314   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14315             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14316             "negl    $dst" %}
14317   ins_encode %{
14318     __ cmpl($p$$Register, $q$$Register);
14319     __ setcc(Assembler::less, $dst$$Register);
14320     __ negl($dst$$Register);
14321   %}
14322   ins_pipe(pipe_slow);
14323 %}
14324 
14325 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14326 %{
14327   match(Set dst (CmpLTMask dst zero));
14328   effect(KILL cr);
14329 
14330   ins_cost(100);
14331   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14332   ins_encode %{
14333     __ sarl($dst$$Register, 31);
14334   %}
14335   ins_pipe(ialu_reg);
14336 %}
14337 
14338 /* Better to save a register than avoid a branch */
14339 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14340 %{
14341   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14342   effect(KILL cr);
14343   ins_cost(300);
14344   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14345             "jge     done\n\t"
14346             "addl    $p,$y\n"
14347             "done:   " %}
14348   ins_encode %{
14349     Register Rp = $p$$Register;
14350     Register Rq = $q$$Register;
14351     Register Ry = $y$$Register;
14352     Label done;
14353     __ subl(Rp, Rq);
14354     __ jccb(Assembler::greaterEqual, done);
14355     __ addl(Rp, Ry);
14356     __ bind(done);
14357   %}
14358   ins_pipe(pipe_cmplt);
14359 %}
14360 
14361 /* Better to save a register than avoid a branch */
14362 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14363 %{
14364   match(Set y (AndI (CmpLTMask p q) y));
14365   effect(KILL cr);
14366 
14367   ins_cost(300);
14368 
14369   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14370             "jlt     done\n\t"
14371             "xorl    $y, $y\n"
14372             "done:   " %}
14373   ins_encode %{
14374     Register Rp = $p$$Register;
14375     Register Rq = $q$$Register;
14376     Register Ry = $y$$Register;
14377     Label done;
14378     __ cmpl(Rp, Rq);
14379     __ jccb(Assembler::less, done);
14380     __ xorl(Ry, Ry);
14381     __ bind(done);
14382   %}
14383   ins_pipe(pipe_cmplt);
14384 %}
14385 
14386 
14387 //---------- FP Instructions------------------------------------------------
14388 
14389 // Really expensive, avoid
14390 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14391 %{
14392   match(Set cr (CmpF src1 src2));
14393 
14394   ins_cost(500);
14395   format %{ "ucomiss $src1, $src2\n\t"
14396             "jnp,s   exit\n\t"
14397             "pushfq\t# saw NaN, set CF\n\t"
14398             "andq    [rsp], #0xffffff2b\n\t"
14399             "popfq\n"
14400     "exit:" %}
14401   ins_encode %{
14402     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14403     emit_cmpfp_fixup(masm);
14404   %}
14405   ins_pipe(pipe_slow);
14406 %}
14407 
14408 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14409   match(Set cr (CmpF src1 src2));
14410 
14411   ins_cost(100);
14412   format %{ "ucomiss $src1, $src2" %}
14413   ins_encode %{
14414     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14415   %}
14416   ins_pipe(pipe_slow);
14417 %}
14418 
14419 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14420   match(Set cr (CmpF src1 (LoadF src2)));
14421 
14422   ins_cost(100);
14423   format %{ "ucomiss $src1, $src2" %}
14424   ins_encode %{
14425     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14426   %}
14427   ins_pipe(pipe_slow);
14428 %}
14429 
14430 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14431   match(Set cr (CmpF src con));
14432   ins_cost(100);
14433   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14434   ins_encode %{
14435     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14436   %}
14437   ins_pipe(pipe_slow);
14438 %}
14439 
14440 // Really expensive, avoid
14441 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14442 %{
14443   match(Set cr (CmpD src1 src2));
14444 
14445   ins_cost(500);
14446   format %{ "ucomisd $src1, $src2\n\t"
14447             "jnp,s   exit\n\t"
14448             "pushfq\t# saw NaN, set CF\n\t"
14449             "andq    [rsp], #0xffffff2b\n\t"
14450             "popfq\n"
14451     "exit:" %}
14452   ins_encode %{
14453     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14454     emit_cmpfp_fixup(masm);
14455   %}
14456   ins_pipe(pipe_slow);
14457 %}
14458 
14459 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14460   match(Set cr (CmpD src1 src2));
14461 
14462   ins_cost(100);
14463   format %{ "ucomisd $src1, $src2 test" %}
14464   ins_encode %{
14465     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14466   %}
14467   ins_pipe(pipe_slow);
14468 %}
14469 
14470 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14471   match(Set cr (CmpD src1 (LoadD src2)));
14472 
14473   ins_cost(100);
14474   format %{ "ucomisd $src1, $src2" %}
14475   ins_encode %{
14476     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14477   %}
14478   ins_pipe(pipe_slow);
14479 %}
14480 
14481 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14482   match(Set cr (CmpD src con));
14483   ins_cost(100);
14484   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14485   ins_encode %{
14486     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14487   %}
14488   ins_pipe(pipe_slow);
14489 %}
14490 
14491 // Compare into -1,0,1
14492 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14493 %{
14494   match(Set dst (CmpF3 src1 src2));
14495   effect(KILL cr);
14496 
14497   ins_cost(275);
14498   format %{ "ucomiss $src1, $src2\n\t"
14499             "movl    $dst, #-1\n\t"
14500             "jp,s    done\n\t"
14501             "jb,s    done\n\t"
14502             "setne   $dst\n\t"
14503             "movzbl  $dst, $dst\n"
14504     "done:" %}
14505   ins_encode %{
14506     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14507     emit_cmpfp3(masm, $dst$$Register);
14508   %}
14509   ins_pipe(pipe_slow);
14510 %}
14511 
14512 // Compare into -1,0,1
14513 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14514 %{
14515   match(Set dst (CmpF3 src1 (LoadF src2)));
14516   effect(KILL cr);
14517 
14518   ins_cost(275);
14519   format %{ "ucomiss $src1, $src2\n\t"
14520             "movl    $dst, #-1\n\t"
14521             "jp,s    done\n\t"
14522             "jb,s    done\n\t"
14523             "setne   $dst\n\t"
14524             "movzbl  $dst, $dst\n"
14525     "done:" %}
14526   ins_encode %{
14527     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14528     emit_cmpfp3(masm, $dst$$Register);
14529   %}
14530   ins_pipe(pipe_slow);
14531 %}
14532 
14533 // Compare into -1,0,1
14534 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14535   match(Set dst (CmpF3 src con));
14536   effect(KILL cr);
14537 
14538   ins_cost(275);
14539   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14540             "movl    $dst, #-1\n\t"
14541             "jp,s    done\n\t"
14542             "jb,s    done\n\t"
14543             "setne   $dst\n\t"
14544             "movzbl  $dst, $dst\n"
14545     "done:" %}
14546   ins_encode %{
14547     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14548     emit_cmpfp3(masm, $dst$$Register);
14549   %}
14550   ins_pipe(pipe_slow);
14551 %}
14552 
14553 // Compare into -1,0,1
14554 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14555 %{
14556   match(Set dst (CmpD3 src1 src2));
14557   effect(KILL cr);
14558 
14559   ins_cost(275);
14560   format %{ "ucomisd $src1, $src2\n\t"
14561             "movl    $dst, #-1\n\t"
14562             "jp,s    done\n\t"
14563             "jb,s    done\n\t"
14564             "setne   $dst\n\t"
14565             "movzbl  $dst, $dst\n"
14566     "done:" %}
14567   ins_encode %{
14568     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14569     emit_cmpfp3(masm, $dst$$Register);
14570   %}
14571   ins_pipe(pipe_slow);
14572 %}
14573 
14574 // Compare into -1,0,1
14575 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14576 %{
14577   match(Set dst (CmpD3 src1 (LoadD src2)));
14578   effect(KILL cr);
14579 
14580   ins_cost(275);
14581   format %{ "ucomisd $src1, $src2\n\t"
14582             "movl    $dst, #-1\n\t"
14583             "jp,s    done\n\t"
14584             "jb,s    done\n\t"
14585             "setne   $dst\n\t"
14586             "movzbl  $dst, $dst\n"
14587     "done:" %}
14588   ins_encode %{
14589     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14590     emit_cmpfp3(masm, $dst$$Register);
14591   %}
14592   ins_pipe(pipe_slow);
14593 %}
14594 
14595 // Compare into -1,0,1
14596 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14597   match(Set dst (CmpD3 src con));
14598   effect(KILL cr);
14599 
14600   ins_cost(275);
14601   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14602             "movl    $dst, #-1\n\t"
14603             "jp,s    done\n\t"
14604             "jb,s    done\n\t"
14605             "setne   $dst\n\t"
14606             "movzbl  $dst, $dst\n"
14607     "done:" %}
14608   ins_encode %{
14609     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14610     emit_cmpfp3(masm, $dst$$Register);
14611   %}
14612   ins_pipe(pipe_slow);
14613 %}
14614 
14615 //----------Arithmetic Conversion Instructions---------------------------------
14616 
14617 instruct convF2D_reg_reg(regD dst, regF src)
14618 %{
14619   match(Set dst (ConvF2D src));
14620 
14621   format %{ "cvtss2sd $dst, $src" %}
14622   ins_encode %{
14623     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14624   %}
14625   ins_pipe(pipe_slow); // XXX
14626 %}
14627 
14628 instruct convF2D_reg_mem(regD dst, memory src)
14629 %{
14630   predicate(UseAVX == 0);
14631   match(Set dst (ConvF2D (LoadF src)));
14632 
14633   format %{ "cvtss2sd $dst, $src" %}
14634   ins_encode %{
14635     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14636   %}
14637   ins_pipe(pipe_slow); // XXX
14638 %}
14639 
14640 instruct convD2F_reg_reg(regF dst, regD src)
14641 %{
14642   match(Set dst (ConvD2F src));
14643 
14644   format %{ "cvtsd2ss $dst, $src" %}
14645   ins_encode %{
14646     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14647   %}
14648   ins_pipe(pipe_slow); // XXX
14649 %}
14650 
14651 instruct convD2F_reg_mem(regF dst, memory src)
14652 %{
14653   predicate(UseAVX == 0);
14654   match(Set dst (ConvD2F (LoadD src)));
14655 
14656   format %{ "cvtsd2ss $dst, $src" %}
14657   ins_encode %{
14658     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14659   %}
14660   ins_pipe(pipe_slow); // XXX
14661 %}
14662 
14663 // XXX do mem variants
14664 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14665 %{
14666   predicate(!VM_Version::supports_avx10_2());
14667   match(Set dst (ConvF2I src));
14668   effect(KILL cr);
14669   format %{ "convert_f2i $dst, $src" %}
14670   ins_encode %{
14671     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14672   %}
14673   ins_pipe(pipe_slow);
14674 %}
14675 
14676 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14677 %{
14678   predicate(VM_Version::supports_avx10_2());
14679   match(Set dst (ConvF2I src));
14680   format %{ "evcvttss2sisl $dst, $src" %}
14681   ins_encode %{
14682     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14683   %}
14684   ins_pipe(pipe_slow);
14685 %}
14686 
14687 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14688 %{
14689   predicate(VM_Version::supports_avx10_2());
14690   match(Set dst (ConvF2I (LoadF src)));
14691   format %{ "evcvttss2sisl $dst, $src" %}
14692   ins_encode %{
14693     __ evcvttss2sisl($dst$$Register, $src$$Address);
14694   %}
14695   ins_pipe(pipe_slow);
14696 %}
14697 
14698 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14699 %{
14700   predicate(!VM_Version::supports_avx10_2());
14701   match(Set dst (ConvF2L src));
14702   effect(KILL cr);
14703   format %{ "convert_f2l $dst, $src"%}
14704   ins_encode %{
14705     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14706   %}
14707   ins_pipe(pipe_slow);
14708 %}
14709 
14710 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14711 %{
14712   predicate(VM_Version::supports_avx10_2());
14713   match(Set dst (ConvF2L src));
14714   format %{ "evcvttss2sisq $dst, $src" %}
14715   ins_encode %{
14716     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14717   %}
14718   ins_pipe(pipe_slow);
14719 %}
14720 
14721 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14722 %{
14723   predicate(VM_Version::supports_avx10_2());
14724   match(Set dst (ConvF2L (LoadF src)));
14725   format %{ "evcvttss2sisq $dst, $src" %}
14726   ins_encode %{
14727     __ evcvttss2sisq($dst$$Register, $src$$Address);
14728   %}
14729   ins_pipe(pipe_slow);
14730 %}
14731 
14732 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14733 %{
14734   predicate(!VM_Version::supports_avx10_2());
14735   match(Set dst (ConvD2I src));
14736   effect(KILL cr);
14737   format %{ "convert_d2i $dst, $src"%}
14738   ins_encode %{
14739     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14740   %}
14741   ins_pipe(pipe_slow);
14742 %}
14743 
14744 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14745 %{
14746   predicate(VM_Version::supports_avx10_2());
14747   match(Set dst (ConvD2I src));
14748   format %{ "evcvttsd2sisl $dst, $src" %}
14749   ins_encode %{
14750     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14751   %}
14752   ins_pipe(pipe_slow);
14753 %}
14754 
14755 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14756 %{
14757   predicate(VM_Version::supports_avx10_2());
14758   match(Set dst (ConvD2I (LoadD src)));
14759   format %{ "evcvttsd2sisl $dst, $src" %}
14760   ins_encode %{
14761     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14762   %}
14763   ins_pipe(pipe_slow);
14764 %}
14765 
14766 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14767 %{
14768   predicate(!VM_Version::supports_avx10_2());
14769   match(Set dst (ConvD2L src));
14770   effect(KILL cr);
14771   format %{ "convert_d2l $dst, $src"%}
14772   ins_encode %{
14773     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14774   %}
14775   ins_pipe(pipe_slow);
14776 %}
14777 
14778 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14779 %{
14780   predicate(VM_Version::supports_avx10_2());
14781   match(Set dst (ConvD2L src));
14782   format %{ "evcvttsd2sisq $dst, $src" %}
14783   ins_encode %{
14784     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14785   %}
14786   ins_pipe(pipe_slow);
14787 %}
14788 
14789 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14790 %{
14791   predicate(VM_Version::supports_avx10_2());
14792   match(Set dst (ConvD2L (LoadD src)));
14793   format %{ "evcvttsd2sisq $dst, $src" %}
14794   ins_encode %{
14795     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14796   %}
14797   ins_pipe(pipe_slow);
14798 %}
14799 
14800 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14801 %{
14802   match(Set dst (RoundD src));
14803   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14804   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14805   ins_encode %{
14806     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14807   %}
14808   ins_pipe(pipe_slow);
14809 %}
14810 
14811 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14812 %{
14813   match(Set dst (RoundF src));
14814   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14815   format %{ "round_float $dst,$src" %}
14816   ins_encode %{
14817     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14818   %}
14819   ins_pipe(pipe_slow);
14820 %}
14821 
14822 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14823 %{
14824   predicate(!UseXmmI2F);
14825   match(Set dst (ConvI2F src));
14826 
14827   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14828   ins_encode %{
14829     if (UseAVX > 0) {
14830       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14831     }
14832     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14833   %}
14834   ins_pipe(pipe_slow); // XXX
14835 %}
14836 
14837 instruct convI2F_reg_mem(regF dst, memory src)
14838 %{
14839   predicate(UseAVX == 0);
14840   match(Set dst (ConvI2F (LoadI src)));
14841 
14842   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14843   ins_encode %{
14844     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14845   %}
14846   ins_pipe(pipe_slow); // XXX
14847 %}
14848 
14849 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14850 %{
14851   predicate(!UseXmmI2D);
14852   match(Set dst (ConvI2D src));
14853 
14854   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14855   ins_encode %{
14856     if (UseAVX > 0) {
14857       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14858     }
14859     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14860   %}
14861   ins_pipe(pipe_slow); // XXX
14862 %}
14863 
14864 instruct convI2D_reg_mem(regD dst, memory src)
14865 %{
14866   predicate(UseAVX == 0);
14867   match(Set dst (ConvI2D (LoadI src)));
14868 
14869   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14870   ins_encode %{
14871     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14872   %}
14873   ins_pipe(pipe_slow); // XXX
14874 %}
14875 
14876 instruct convXI2F_reg(regF dst, rRegI src)
14877 %{
14878   predicate(UseXmmI2F);
14879   match(Set dst (ConvI2F src));
14880 
14881   format %{ "movdl $dst, $src\n\t"
14882             "cvtdq2psl $dst, $dst\t# i2f" %}
14883   ins_encode %{
14884     __ movdl($dst$$XMMRegister, $src$$Register);
14885     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14886   %}
14887   ins_pipe(pipe_slow); // XXX
14888 %}
14889 
14890 instruct convXI2D_reg(regD dst, rRegI src)
14891 %{
14892   predicate(UseXmmI2D);
14893   match(Set dst (ConvI2D src));
14894 
14895   format %{ "movdl $dst, $src\n\t"
14896             "cvtdq2pdl $dst, $dst\t# i2d" %}
14897   ins_encode %{
14898     __ movdl($dst$$XMMRegister, $src$$Register);
14899     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14900   %}
14901   ins_pipe(pipe_slow); // XXX
14902 %}
14903 
14904 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14905 %{
14906   match(Set dst (ConvL2F src));
14907 
14908   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14909   ins_encode %{
14910     if (UseAVX > 0) {
14911       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14912     }
14913     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14914   %}
14915   ins_pipe(pipe_slow); // XXX
14916 %}
14917 
14918 instruct convL2F_reg_mem(regF dst, memory src)
14919 %{
14920   predicate(UseAVX == 0);
14921   match(Set dst (ConvL2F (LoadL src)));
14922 
14923   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14924   ins_encode %{
14925     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14926   %}
14927   ins_pipe(pipe_slow); // XXX
14928 %}
14929 
14930 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14931 %{
14932   match(Set dst (ConvL2D src));
14933 
14934   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14935   ins_encode %{
14936     if (UseAVX > 0) {
14937       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14938     }
14939     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14940   %}
14941   ins_pipe(pipe_slow); // XXX
14942 %}
14943 
14944 instruct convL2D_reg_mem(regD dst, memory src)
14945 %{
14946   predicate(UseAVX == 0);
14947   match(Set dst (ConvL2D (LoadL src)));
14948 
14949   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14950   ins_encode %{
14951     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14952   %}
14953   ins_pipe(pipe_slow); // XXX
14954 %}
14955 
14956 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14957 %{
14958   match(Set dst (ConvI2L src));
14959 
14960   ins_cost(125);
14961   format %{ "movslq  $dst, $src\t# i2l" %}
14962   ins_encode %{
14963     __ movslq($dst$$Register, $src$$Register);
14964   %}
14965   ins_pipe(ialu_reg_reg);
14966 %}
14967 
14968 // Zero-extend convert int to long
14969 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14970 %{
14971   match(Set dst (AndL (ConvI2L src) mask));
14972 
14973   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14974   ins_encode %{
14975     if ($dst$$reg != $src$$reg) {
14976       __ movl($dst$$Register, $src$$Register);
14977     }
14978   %}
14979   ins_pipe(ialu_reg_reg);
14980 %}
14981 
14982 // Zero-extend convert int to long
14983 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14984 %{
14985   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14986 
14987   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14988   ins_encode %{
14989     __ movl($dst$$Register, $src$$Address);
14990   %}
14991   ins_pipe(ialu_reg_mem);
14992 %}
14993 
14994 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14995 %{
14996   match(Set dst (AndL src mask));
14997 
14998   format %{ "movl    $dst, $src\t# zero-extend long" %}
14999   ins_encode %{
15000     __ movl($dst$$Register, $src$$Register);
15001   %}
15002   ins_pipe(ialu_reg_reg);
15003 %}
15004 
15005 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15006 %{
15007   match(Set dst (ConvL2I src));
15008 
15009   format %{ "movl    $dst, $src\t# l2i" %}
15010   ins_encode %{
15011     __ movl($dst$$Register, $src$$Register);
15012   %}
15013   ins_pipe(ialu_reg_reg);
15014 %}
15015 
15016 
15017 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15018   match(Set dst (MoveF2I src));
15019   effect(DEF dst, USE src);
15020 
15021   ins_cost(125);
15022   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
15023   ins_encode %{
15024     __ movl($dst$$Register, Address(rsp, $src$$disp));
15025   %}
15026   ins_pipe(ialu_reg_mem);
15027 %}
15028 
15029 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15030   match(Set dst (MoveI2F src));
15031   effect(DEF dst, USE src);
15032 
15033   ins_cost(125);
15034   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
15035   ins_encode %{
15036     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15037   %}
15038   ins_pipe(pipe_slow);
15039 %}
15040 
15041 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15042   match(Set dst (MoveD2L src));
15043   effect(DEF dst, USE src);
15044 
15045   ins_cost(125);
15046   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
15047   ins_encode %{
15048     __ movq($dst$$Register, Address(rsp, $src$$disp));
15049   %}
15050   ins_pipe(ialu_reg_mem);
15051 %}
15052 
15053 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15054   predicate(!UseXmmLoadAndClearUpper);
15055   match(Set dst (MoveL2D src));
15056   effect(DEF dst, USE src);
15057 
15058   ins_cost(125);
15059   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
15060   ins_encode %{
15061     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15062   %}
15063   ins_pipe(pipe_slow);
15064 %}
15065 
15066 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15067   predicate(UseXmmLoadAndClearUpper);
15068   match(Set dst (MoveL2D src));
15069   effect(DEF dst, USE src);
15070 
15071   ins_cost(125);
15072   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
15073   ins_encode %{
15074     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15075   %}
15076   ins_pipe(pipe_slow);
15077 %}
15078 
15079 
15080 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15081   match(Set dst (MoveF2I src));
15082   effect(DEF dst, USE src);
15083 
15084   ins_cost(95); // XXX
15085   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
15086   ins_encode %{
15087     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15088   %}
15089   ins_pipe(pipe_slow);
15090 %}
15091 
15092 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15093   match(Set dst (MoveI2F src));
15094   effect(DEF dst, USE src);
15095 
15096   ins_cost(100);
15097   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
15098   ins_encode %{
15099     __ movl(Address(rsp, $dst$$disp), $src$$Register);
15100   %}
15101   ins_pipe( ialu_mem_reg );
15102 %}
15103 
15104 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15105   match(Set dst (MoveD2L src));
15106   effect(DEF dst, USE src);
15107 
15108   ins_cost(95); // XXX
15109   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
15110   ins_encode %{
15111     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15112   %}
15113   ins_pipe(pipe_slow);
15114 %}
15115 
15116 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15117   match(Set dst (MoveL2D src));
15118   effect(DEF dst, USE src);
15119 
15120   ins_cost(100);
15121   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
15122   ins_encode %{
15123     __ movq(Address(rsp, $dst$$disp), $src$$Register);
15124   %}
15125   ins_pipe(ialu_mem_reg);
15126 %}
15127 
15128 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15129   match(Set dst (MoveF2I src));
15130   effect(DEF dst, USE src);
15131   ins_cost(85);
15132   format %{ "movd    $dst,$src\t# MoveF2I" %}
15133   ins_encode %{
15134     __ movdl($dst$$Register, $src$$XMMRegister);
15135   %}
15136   ins_pipe( pipe_slow );
15137 %}
15138 
15139 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15140   match(Set dst (MoveD2L src));
15141   effect(DEF dst, USE src);
15142   ins_cost(85);
15143   format %{ "movd    $dst,$src\t# MoveD2L" %}
15144   ins_encode %{
15145     __ movdq($dst$$Register, $src$$XMMRegister);
15146   %}
15147   ins_pipe( pipe_slow );
15148 %}
15149 
15150 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15151   match(Set dst (MoveI2F src));
15152   effect(DEF dst, USE src);
15153   ins_cost(100);
15154   format %{ "movd    $dst,$src\t# MoveI2F" %}
15155   ins_encode %{
15156     __ movdl($dst$$XMMRegister, $src$$Register);
15157   %}
15158   ins_pipe( pipe_slow );
15159 %}
15160 
15161 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15162   match(Set dst (MoveL2D src));
15163   effect(DEF dst, USE src);
15164   ins_cost(100);
15165   format %{ "movd    $dst,$src\t# MoveL2D" %}
15166   ins_encode %{
15167      __ movdq($dst$$XMMRegister, $src$$Register);
15168   %}
15169   ins_pipe( pipe_slow );
15170 %}
15171 
15172 
15173 // Fast clearing of an array
15174 // Small non-constant lenght ClearArray for non-AVX512 targets.
15175 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15176                   Universe dummy, rFlagsReg cr)
15177 %{
15178   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15179   match(Set dummy (ClearArray (Binary cnt base) val));
15180   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15181 
15182   format %{ $$template
15183     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15184     $$emit$$"jg      LARGE\n\t"
15185     $$emit$$"dec     rcx\n\t"
15186     $$emit$$"js      DONE\t# Zero length\n\t"
15187     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15188     $$emit$$"dec     rcx\n\t"
15189     $$emit$$"jge     LOOP\n\t"
15190     $$emit$$"jmp     DONE\n\t"
15191     $$emit$$"# LARGE:\n\t"
15192     if (UseFastStosb) {
15193        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15194        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15195     } else if (UseXMMForObjInit) {
15196        $$emit$$"movdq   $tmp, $val\n\t"
15197        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15198        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15199        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15200        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15201        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15202        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15203        $$emit$$"add     0x40,rax\n\t"
15204        $$emit$$"# L_zero_64_bytes:\n\t"
15205        $$emit$$"sub     0x8,rcx\n\t"
15206        $$emit$$"jge     L_loop\n\t"
15207        $$emit$$"add     0x4,rcx\n\t"
15208        $$emit$$"jl      L_tail\n\t"
15209        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15210        $$emit$$"add     0x20,rax\n\t"
15211        $$emit$$"sub     0x4,rcx\n\t"
15212        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15213        $$emit$$"add     0x4,rcx\n\t"
15214        $$emit$$"jle     L_end\n\t"
15215        $$emit$$"dec     rcx\n\t"
15216        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15217        $$emit$$"vmovq   xmm0,(rax)\n\t"
15218        $$emit$$"add     0x8,rax\n\t"
15219        $$emit$$"dec     rcx\n\t"
15220        $$emit$$"jge     L_sloop\n\t"
15221        $$emit$$"# L_end:\n\t"
15222     } else {
15223        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15224     }
15225     $$emit$$"# DONE"
15226   %}
15227   ins_encode %{
15228     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15229                  $tmp$$XMMRegister, false, false);
15230   %}
15231   ins_pipe(pipe_slow);
15232 %}
15233 
15234 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15235                             Universe dummy, rFlagsReg cr)
15236 %{
15237   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15238   match(Set dummy (ClearArray (Binary cnt base) val));
15239   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15240 
15241   format %{ $$template
15242     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15243     $$emit$$"jg      LARGE\n\t"
15244     $$emit$$"dec     rcx\n\t"
15245     $$emit$$"js      DONE\t# Zero length\n\t"
15246     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15247     $$emit$$"dec     rcx\n\t"
15248     $$emit$$"jge     LOOP\n\t"
15249     $$emit$$"jmp     DONE\n\t"
15250     $$emit$$"# LARGE:\n\t"
15251     if (UseXMMForObjInit) {
15252        $$emit$$"movdq   $tmp, $val\n\t"
15253        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15254        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15255        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15256        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15257        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15258        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15259        $$emit$$"add     0x40,rax\n\t"
15260        $$emit$$"# L_zero_64_bytes:\n\t"
15261        $$emit$$"sub     0x8,rcx\n\t"
15262        $$emit$$"jge     L_loop\n\t"
15263        $$emit$$"add     0x4,rcx\n\t"
15264        $$emit$$"jl      L_tail\n\t"
15265        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15266        $$emit$$"add     0x20,rax\n\t"
15267        $$emit$$"sub     0x4,rcx\n\t"
15268        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15269        $$emit$$"add     0x4,rcx\n\t"
15270        $$emit$$"jle     L_end\n\t"
15271        $$emit$$"dec     rcx\n\t"
15272        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15273        $$emit$$"vmovq   xmm0,(rax)\n\t"
15274        $$emit$$"add     0x8,rax\n\t"
15275        $$emit$$"dec     rcx\n\t"
15276        $$emit$$"jge     L_sloop\n\t"
15277        $$emit$$"# L_end:\n\t"
15278     } else {
15279        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15280     }
15281     $$emit$$"# DONE"
15282   %}
15283   ins_encode %{
15284     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15285                  $tmp$$XMMRegister, false, true);
15286   %}
15287   ins_pipe(pipe_slow);
15288 %}
15289 
15290 // Small non-constant length ClearArray for AVX512 targets.
15291 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15292                        Universe dummy, rFlagsReg cr)
15293 %{
15294   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15295   match(Set dummy (ClearArray (Binary cnt base) val));
15296   ins_cost(125);
15297   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15298 
15299   format %{ $$template
15300     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15301     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15302     $$emit$$"jg      LARGE\n\t"
15303     $$emit$$"dec     rcx\n\t"
15304     $$emit$$"js      DONE\t# Zero length\n\t"
15305     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15306     $$emit$$"dec     rcx\n\t"
15307     $$emit$$"jge     LOOP\n\t"
15308     $$emit$$"jmp     DONE\n\t"
15309     $$emit$$"# LARGE:\n\t"
15310     if (UseFastStosb) {
15311        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15312        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15313     } else if (UseXMMForObjInit) {
15314        $$emit$$"mov     rdi,rax\n\t"
15315        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15316        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15317        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15318        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15319        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15320        $$emit$$"add     0x40,rax\n\t"
15321        $$emit$$"# L_zero_64_bytes:\n\t"
15322        $$emit$$"sub     0x8,rcx\n\t"
15323        $$emit$$"jge     L_loop\n\t"
15324        $$emit$$"add     0x4,rcx\n\t"
15325        $$emit$$"jl      L_tail\n\t"
15326        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15327        $$emit$$"add     0x20,rax\n\t"
15328        $$emit$$"sub     0x4,rcx\n\t"
15329        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15330        $$emit$$"add     0x4,rcx\n\t"
15331        $$emit$$"jle     L_end\n\t"
15332        $$emit$$"dec     rcx\n\t"
15333        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15334        $$emit$$"vmovq   xmm0,(rax)\n\t"
15335        $$emit$$"add     0x8,rax\n\t"
15336        $$emit$$"dec     rcx\n\t"
15337        $$emit$$"jge     L_sloop\n\t"
15338        $$emit$$"# L_end:\n\t"
15339     } else {
15340        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15341     }
15342     $$emit$$"# DONE"
15343   %}
15344   ins_encode %{
15345     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15346                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15347   %}
15348   ins_pipe(pipe_slow);
15349 %}
15350 
15351 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15352                                  Universe dummy, rFlagsReg cr)
15353 %{
15354   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15355   match(Set dummy (ClearArray (Binary cnt base) val));
15356   ins_cost(125);
15357   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15358 
15359   format %{ $$template
15360     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15361     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15362     $$emit$$"jg      LARGE\n\t"
15363     $$emit$$"dec     rcx\n\t"
15364     $$emit$$"js      DONE\t# Zero length\n\t"
15365     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15366     $$emit$$"dec     rcx\n\t"
15367     $$emit$$"jge     LOOP\n\t"
15368     $$emit$$"jmp     DONE\n\t"
15369     $$emit$$"# LARGE:\n\t"
15370     if (UseFastStosb) {
15371        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15372        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15373     } else if (UseXMMForObjInit) {
15374        $$emit$$"mov     rdi,rax\n\t"
15375        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15376        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15377        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15378        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15379        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15380        $$emit$$"add     0x40,rax\n\t"
15381        $$emit$$"# L_zero_64_bytes:\n\t"
15382        $$emit$$"sub     0x8,rcx\n\t"
15383        $$emit$$"jge     L_loop\n\t"
15384        $$emit$$"add     0x4,rcx\n\t"
15385        $$emit$$"jl      L_tail\n\t"
15386        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15387        $$emit$$"add     0x20,rax\n\t"
15388        $$emit$$"sub     0x4,rcx\n\t"
15389        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15390        $$emit$$"add     0x4,rcx\n\t"
15391        $$emit$$"jle     L_end\n\t"
15392        $$emit$$"dec     rcx\n\t"
15393        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15394        $$emit$$"vmovq   xmm0,(rax)\n\t"
15395        $$emit$$"add     0x8,rax\n\t"
15396        $$emit$$"dec     rcx\n\t"
15397        $$emit$$"jge     L_sloop\n\t"
15398        $$emit$$"# L_end:\n\t"
15399     } else {
15400        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15401     }
15402     $$emit$$"# DONE"
15403   %}
15404   ins_encode %{
15405     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15406                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15407   %}
15408   ins_pipe(pipe_slow);
15409 %}
15410 
15411 // Large non-constant length ClearArray for non-AVX512 targets.
15412 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15413                         Universe dummy, rFlagsReg cr)
15414 %{
15415   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15416   match(Set dummy (ClearArray (Binary cnt base) val));
15417   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15418 
15419   format %{ $$template
15420     if (UseFastStosb) {
15421        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15422        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15423     } else if (UseXMMForObjInit) {
15424        $$emit$$"movdq   $tmp, $val\n\t"
15425        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15426        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15427        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15428        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15429        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15430        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15431        $$emit$$"add     0x40,rax\n\t"
15432        $$emit$$"# L_zero_64_bytes:\n\t"
15433        $$emit$$"sub     0x8,rcx\n\t"
15434        $$emit$$"jge     L_loop\n\t"
15435        $$emit$$"add     0x4,rcx\n\t"
15436        $$emit$$"jl      L_tail\n\t"
15437        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15438        $$emit$$"add     0x20,rax\n\t"
15439        $$emit$$"sub     0x4,rcx\n\t"
15440        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15441        $$emit$$"add     0x4,rcx\n\t"
15442        $$emit$$"jle     L_end\n\t"
15443        $$emit$$"dec     rcx\n\t"
15444        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15445        $$emit$$"vmovq   xmm0,(rax)\n\t"
15446        $$emit$$"add     0x8,rax\n\t"
15447        $$emit$$"dec     rcx\n\t"
15448        $$emit$$"jge     L_sloop\n\t"
15449        $$emit$$"# L_end:\n\t"
15450     } else {
15451        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15452     }
15453   %}
15454   ins_encode %{
15455     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15456                  $tmp$$XMMRegister, true, false);
15457   %}
15458   ins_pipe(pipe_slow);
15459 %}
15460 
15461 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15462                                   Universe dummy, rFlagsReg cr)
15463 %{
15464   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15465   match(Set dummy (ClearArray (Binary cnt base) val));
15466   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15467 
15468   format %{ $$template
15469     if (UseXMMForObjInit) {
15470        $$emit$$"movdq   $tmp, $val\n\t"
15471        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15472        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15473        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15474        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15475        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15476        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15477        $$emit$$"add     0x40,rax\n\t"
15478        $$emit$$"# L_zero_64_bytes:\n\t"
15479        $$emit$$"sub     0x8,rcx\n\t"
15480        $$emit$$"jge     L_loop\n\t"
15481        $$emit$$"add     0x4,rcx\n\t"
15482        $$emit$$"jl      L_tail\n\t"
15483        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15484        $$emit$$"add     0x20,rax\n\t"
15485        $$emit$$"sub     0x4,rcx\n\t"
15486        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15487        $$emit$$"add     0x4,rcx\n\t"
15488        $$emit$$"jle     L_end\n\t"
15489        $$emit$$"dec     rcx\n\t"
15490        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15491        $$emit$$"vmovq   xmm0,(rax)\n\t"
15492        $$emit$$"add     0x8,rax\n\t"
15493        $$emit$$"dec     rcx\n\t"
15494        $$emit$$"jge     L_sloop\n\t"
15495        $$emit$$"# L_end:\n\t"
15496     } else {
15497        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15498     }
15499   %}
15500   ins_encode %{
15501     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15502                  $tmp$$XMMRegister, true, true);
15503   %}
15504   ins_pipe(pipe_slow);
15505 %}
15506 
15507 // Large non-constant length ClearArray for AVX512 targets.
15508 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15509                              Universe dummy, rFlagsReg cr)
15510 %{
15511   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15512   match(Set dummy (ClearArray (Binary cnt base) val));
15513   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15514 
15515   format %{ $$template
15516     if (UseFastStosb) {
15517        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15518        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15519        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15520     } else if (UseXMMForObjInit) {
15521        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15522        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15523        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15524        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15525        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15526        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15527        $$emit$$"add     0x40,rax\n\t"
15528        $$emit$$"# L_zero_64_bytes:\n\t"
15529        $$emit$$"sub     0x8,rcx\n\t"
15530        $$emit$$"jge     L_loop\n\t"
15531        $$emit$$"add     0x4,rcx\n\t"
15532        $$emit$$"jl      L_tail\n\t"
15533        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15534        $$emit$$"add     0x20,rax\n\t"
15535        $$emit$$"sub     0x4,rcx\n\t"
15536        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15537        $$emit$$"add     0x4,rcx\n\t"
15538        $$emit$$"jle     L_end\n\t"
15539        $$emit$$"dec     rcx\n\t"
15540        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15541        $$emit$$"vmovq   xmm0,(rax)\n\t"
15542        $$emit$$"add     0x8,rax\n\t"
15543        $$emit$$"dec     rcx\n\t"
15544        $$emit$$"jge     L_sloop\n\t"
15545        $$emit$$"# L_end:\n\t"
15546     } else {
15547        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15548        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15549     }
15550   %}
15551   ins_encode %{
15552     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15553                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15554   %}
15555   ins_pipe(pipe_slow);
15556 %}
15557 
15558 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15559                                        Universe dummy, rFlagsReg cr)
15560 %{
15561   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15562   match(Set dummy (ClearArray (Binary cnt base) val));
15563   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15564 
15565   format %{ $$template
15566     if (UseFastStosb) {
15567        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15568        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15569        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15570     } else if (UseXMMForObjInit) {
15571        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15572        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15573        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15574        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15575        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15576        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15577        $$emit$$"add     0x40,rax\n\t"
15578        $$emit$$"# L_zero_64_bytes:\n\t"
15579        $$emit$$"sub     0x8,rcx\n\t"
15580        $$emit$$"jge     L_loop\n\t"
15581        $$emit$$"add     0x4,rcx\n\t"
15582        $$emit$$"jl      L_tail\n\t"
15583        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15584        $$emit$$"add     0x20,rax\n\t"
15585        $$emit$$"sub     0x4,rcx\n\t"
15586        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15587        $$emit$$"add     0x4,rcx\n\t"
15588        $$emit$$"jle     L_end\n\t"
15589        $$emit$$"dec     rcx\n\t"
15590        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15591        $$emit$$"vmovq   xmm0,(rax)\n\t"
15592        $$emit$$"add     0x8,rax\n\t"
15593        $$emit$$"dec     rcx\n\t"
15594        $$emit$$"jge     L_sloop\n\t"
15595        $$emit$$"# L_end:\n\t"
15596     } else {
15597        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15598        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15599     }
15600   %}
15601   ins_encode %{
15602     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15603                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15604   %}
15605   ins_pipe(pipe_slow);
15606 %}
15607 
15608 // Small constant length ClearArray for AVX512 targets.
15609 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15610 %{
15611   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15612             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15613   match(Set dummy (ClearArray (Binary cnt base) val));
15614   ins_cost(100);
15615   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15616   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15617   ins_encode %{
15618     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15619   %}
15620   ins_pipe(pipe_slow);
15621 %}
15622 
15623 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15624                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15625 %{
15626   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15627   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15628   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15629 
15630   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15631   ins_encode %{
15632     __ string_compare($str1$$Register, $str2$$Register,
15633                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15634                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15635   %}
15636   ins_pipe( pipe_slow );
15637 %}
15638 
15639 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15640                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15641 %{
15642   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15643   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15644   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15645 
15646   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15647   ins_encode %{
15648     __ string_compare($str1$$Register, $str2$$Register,
15649                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15650                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15651   %}
15652   ins_pipe( pipe_slow );
15653 %}
15654 
15655 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15656                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15657 %{
15658   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15659   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15660   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15661 
15662   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15663   ins_encode %{
15664     __ string_compare($str1$$Register, $str2$$Register,
15665                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15666                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15667   %}
15668   ins_pipe( pipe_slow );
15669 %}
15670 
15671 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15672                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15673 %{
15674   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15675   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15676   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15677 
15678   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15679   ins_encode %{
15680     __ string_compare($str1$$Register, $str2$$Register,
15681                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15682                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15683   %}
15684   ins_pipe( pipe_slow );
15685 %}
15686 
15687 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15688                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15689 %{
15690   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15691   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15692   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15693 
15694   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15695   ins_encode %{
15696     __ string_compare($str1$$Register, $str2$$Register,
15697                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15698                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15699   %}
15700   ins_pipe( pipe_slow );
15701 %}
15702 
15703 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15704                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15705 %{
15706   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15707   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15708   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15709 
15710   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15711   ins_encode %{
15712     __ string_compare($str1$$Register, $str2$$Register,
15713                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15714                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15715   %}
15716   ins_pipe( pipe_slow );
15717 %}
15718 
15719 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15720                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15721 %{
15722   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15723   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15724   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15725 
15726   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15727   ins_encode %{
15728     __ string_compare($str2$$Register, $str1$$Register,
15729                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15730                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15731   %}
15732   ins_pipe( pipe_slow );
15733 %}
15734 
15735 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15736                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15737 %{
15738   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15739   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15740   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15741 
15742   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15743   ins_encode %{
15744     __ string_compare($str2$$Register, $str1$$Register,
15745                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15746                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15747   %}
15748   ins_pipe( pipe_slow );
15749 %}
15750 
15751 // fast search of substring with known size.
15752 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15753                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15754 %{
15755   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15756   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15757   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15758 
15759   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15760   ins_encode %{
15761     int icnt2 = (int)$int_cnt2$$constant;
15762     if (icnt2 >= 16) {
15763       // IndexOf for constant substrings with size >= 16 elements
15764       // which don't need to be loaded through stack.
15765       __ string_indexofC8($str1$$Register, $str2$$Register,
15766                           $cnt1$$Register, $cnt2$$Register,
15767                           icnt2, $result$$Register,
15768                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15769     } else {
15770       // Small strings are loaded through stack if they cross page boundary.
15771       __ string_indexof($str1$$Register, $str2$$Register,
15772                         $cnt1$$Register, $cnt2$$Register,
15773                         icnt2, $result$$Register,
15774                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15775     }
15776   %}
15777   ins_pipe( pipe_slow );
15778 %}
15779 
15780 // fast search of substring with known size.
15781 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15782                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15783 %{
15784   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15785   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15786   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15787 
15788   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15789   ins_encode %{
15790     int icnt2 = (int)$int_cnt2$$constant;
15791     if (icnt2 >= 8) {
15792       // IndexOf for constant substrings with size >= 8 elements
15793       // which don't need to be loaded through stack.
15794       __ string_indexofC8($str1$$Register, $str2$$Register,
15795                           $cnt1$$Register, $cnt2$$Register,
15796                           icnt2, $result$$Register,
15797                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15798     } else {
15799       // Small strings are loaded through stack if they cross page boundary.
15800       __ string_indexof($str1$$Register, $str2$$Register,
15801                         $cnt1$$Register, $cnt2$$Register,
15802                         icnt2, $result$$Register,
15803                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15804     }
15805   %}
15806   ins_pipe( pipe_slow );
15807 %}
15808 
15809 // fast search of substring with known size.
15810 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15811                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15812 %{
15813   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15814   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15815   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15816 
15817   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15818   ins_encode %{
15819     int icnt2 = (int)$int_cnt2$$constant;
15820     if (icnt2 >= 8) {
15821       // IndexOf for constant substrings with size >= 8 elements
15822       // which don't need to be loaded through stack.
15823       __ string_indexofC8($str1$$Register, $str2$$Register,
15824                           $cnt1$$Register, $cnt2$$Register,
15825                           icnt2, $result$$Register,
15826                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15827     } else {
15828       // Small strings are loaded through stack if they cross page boundary.
15829       __ string_indexof($str1$$Register, $str2$$Register,
15830                         $cnt1$$Register, $cnt2$$Register,
15831                         icnt2, $result$$Register,
15832                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15833     }
15834   %}
15835   ins_pipe( pipe_slow );
15836 %}
15837 
15838 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15839                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15840 %{
15841   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15842   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15843   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15844 
15845   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15846   ins_encode %{
15847     __ string_indexof($str1$$Register, $str2$$Register,
15848                       $cnt1$$Register, $cnt2$$Register,
15849                       (-1), $result$$Register,
15850                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15851   %}
15852   ins_pipe( pipe_slow );
15853 %}
15854 
15855 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15856                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15857 %{
15858   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15859   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15860   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15861 
15862   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15863   ins_encode %{
15864     __ string_indexof($str1$$Register, $str2$$Register,
15865                       $cnt1$$Register, $cnt2$$Register,
15866                       (-1), $result$$Register,
15867                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15868   %}
15869   ins_pipe( pipe_slow );
15870 %}
15871 
15872 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15873                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15874 %{
15875   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15876   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15877   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15878 
15879   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15880   ins_encode %{
15881     __ string_indexof($str1$$Register, $str2$$Register,
15882                       $cnt1$$Register, $cnt2$$Register,
15883                       (-1), $result$$Register,
15884                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15885   %}
15886   ins_pipe( pipe_slow );
15887 %}
15888 
15889 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15890                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15891 %{
15892   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15893   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15894   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15895   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15896   ins_encode %{
15897     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15898                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15899   %}
15900   ins_pipe( pipe_slow );
15901 %}
15902 
15903 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15904                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15905 %{
15906   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15907   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15908   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15909   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15910   ins_encode %{
15911     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15912                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15913   %}
15914   ins_pipe( pipe_slow );
15915 %}
15916 
15917 // fast string equals
15918 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15919                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15920 %{
15921   predicate(!VM_Version::supports_avx512vlbw());
15922   match(Set result (StrEquals (Binary str1 str2) cnt));
15923   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15924 
15925   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15926   ins_encode %{
15927     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15928                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15929                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15930   %}
15931   ins_pipe( pipe_slow );
15932 %}
15933 
15934 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15935                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15936 %{
15937   predicate(VM_Version::supports_avx512vlbw());
15938   match(Set result (StrEquals (Binary str1 str2) cnt));
15939   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15940 
15941   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15942   ins_encode %{
15943     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15944                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15945                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15946   %}
15947   ins_pipe( pipe_slow );
15948 %}
15949 
15950 // fast array equals
15951 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15952                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15953 %{
15954   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15955   match(Set result (AryEq ary1 ary2));
15956   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15957 
15958   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15959   ins_encode %{
15960     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15961                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15962                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15963   %}
15964   ins_pipe( pipe_slow );
15965 %}
15966 
15967 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15968                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15969 %{
15970   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15971   match(Set result (AryEq ary1 ary2));
15972   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15973 
15974   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15975   ins_encode %{
15976     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15977                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15978                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15979   %}
15980   ins_pipe( pipe_slow );
15981 %}
15982 
15983 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15984                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15985 %{
15986   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15987   match(Set result (AryEq ary1 ary2));
15988   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15989 
15990   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15991   ins_encode %{
15992     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15993                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15994                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15995   %}
15996   ins_pipe( pipe_slow );
15997 %}
15998 
15999 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16000                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16001 %{
16002   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16003   match(Set result (AryEq ary1 ary2));
16004   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16005 
16006   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16007   ins_encode %{
16008     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16009                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16010                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
16011   %}
16012   ins_pipe( pipe_slow );
16013 %}
16014 
16015 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
16016                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
16017                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
16018                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
16019                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
16020 %{
16021   predicate(UseAVX >= 2);
16022   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
16023   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
16024          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
16025          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
16026          USE basic_type, KILL cr);
16027 
16028   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
16029   ins_encode %{
16030     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
16031                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16032                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
16033                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
16034                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
16035                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
16036                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
16037   %}
16038   ins_pipe( pipe_slow );
16039 %}
16040 
16041 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16042                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
16043 %{
16044   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16045   match(Set result (CountPositives ary1 len));
16046   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16047 
16048   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
16049   ins_encode %{
16050     __ count_positives($ary1$$Register, $len$$Register,
16051                        $result$$Register, $tmp3$$Register,
16052                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
16053   %}
16054   ins_pipe( pipe_slow );
16055 %}
16056 
16057 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16058                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
16059 %{
16060   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16061   match(Set result (CountPositives ary1 len));
16062   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16063 
16064   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
16065   ins_encode %{
16066     __ count_positives($ary1$$Register, $len$$Register,
16067                        $result$$Register, $tmp3$$Register,
16068                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
16069   %}
16070   ins_pipe( pipe_slow );
16071 %}
16072 
16073 // fast char[] to byte[] compression
16074 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16075                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16076   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16077   match(Set result (StrCompressedCopy src (Binary dst len)));
16078   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
16079          USE_KILL len, KILL tmp5, KILL cr);
16080 
16081   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
16082   ins_encode %{
16083     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16084                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16085                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16086                            knoreg, knoreg);
16087   %}
16088   ins_pipe( pipe_slow );
16089 %}
16090 
16091 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16092                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16093   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16094   match(Set result (StrCompressedCopy src (Binary dst len)));
16095   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
16096          USE_KILL len, KILL tmp5, KILL cr);
16097 
16098   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
16099   ins_encode %{
16100     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16101                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16102                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16103                            $ktmp1$$KRegister, $ktmp2$$KRegister);
16104   %}
16105   ins_pipe( pipe_slow );
16106 %}
16107 // fast byte[] to char[] inflation
16108 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16109                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
16110   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16111   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16112   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16113 
16114   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16115   ins_encode %{
16116     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16117                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
16118   %}
16119   ins_pipe( pipe_slow );
16120 %}
16121 
16122 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16123                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
16124   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16125   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16126   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16127 
16128   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16129   ins_encode %{
16130     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16131                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
16132   %}
16133   ins_pipe( pipe_slow );
16134 %}
16135 
16136 // encode char[] to byte[] in ISO_8859_1
16137 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16138                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16139                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16140   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
16141   match(Set result (EncodeISOArray src (Binary dst len)));
16142   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16143 
16144   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16145   ins_encode %{
16146     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16147                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16148                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
16149   %}
16150   ins_pipe( pipe_slow );
16151 %}
16152 
16153 // encode char[] to byte[] in ASCII
16154 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16155                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16156                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16157   predicate(((EncodeISOArrayNode*)n)->is_ascii());
16158   match(Set result (EncodeISOArray src (Binary dst len)));
16159   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16160 
16161   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16162   ins_encode %{
16163     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16164                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16165                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16166   %}
16167   ins_pipe( pipe_slow );
16168 %}
16169 
16170 //----------Overflow Math Instructions-----------------------------------------
16171 
16172 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16173 %{
16174   match(Set cr (OverflowAddI op1 op2));
16175   effect(DEF cr, USE_KILL op1, USE op2);
16176 
16177   format %{ "addl    $op1, $op2\t# overflow check int" %}
16178 
16179   ins_encode %{
16180     __ addl($op1$$Register, $op2$$Register);
16181   %}
16182   ins_pipe(ialu_reg_reg);
16183 %}
16184 
16185 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16186 %{
16187   match(Set cr (OverflowAddI op1 op2));
16188   effect(DEF cr, USE_KILL op1, USE op2);
16189 
16190   format %{ "addl    $op1, $op2\t# overflow check int" %}
16191 
16192   ins_encode %{
16193     __ addl($op1$$Register, $op2$$constant);
16194   %}
16195   ins_pipe(ialu_reg_reg);
16196 %}
16197 
16198 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16199 %{
16200   match(Set cr (OverflowAddL op1 op2));
16201   effect(DEF cr, USE_KILL op1, USE op2);
16202 
16203   format %{ "addq    $op1, $op2\t# overflow check long" %}
16204   ins_encode %{
16205     __ addq($op1$$Register, $op2$$Register);
16206   %}
16207   ins_pipe(ialu_reg_reg);
16208 %}
16209 
16210 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16211 %{
16212   match(Set cr (OverflowAddL op1 op2));
16213   effect(DEF cr, USE_KILL op1, USE op2);
16214 
16215   format %{ "addq    $op1, $op2\t# overflow check long" %}
16216   ins_encode %{
16217     __ addq($op1$$Register, $op2$$constant);
16218   %}
16219   ins_pipe(ialu_reg_reg);
16220 %}
16221 
16222 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16223 %{
16224   match(Set cr (OverflowSubI op1 op2));
16225 
16226   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16227   ins_encode %{
16228     __ cmpl($op1$$Register, $op2$$Register);
16229   %}
16230   ins_pipe(ialu_reg_reg);
16231 %}
16232 
16233 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16234 %{
16235   match(Set cr (OverflowSubI op1 op2));
16236 
16237   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16238   ins_encode %{
16239     __ cmpl($op1$$Register, $op2$$constant);
16240   %}
16241   ins_pipe(ialu_reg_reg);
16242 %}
16243 
16244 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16245 %{
16246   match(Set cr (OverflowSubL op1 op2));
16247 
16248   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16249   ins_encode %{
16250     __ cmpq($op1$$Register, $op2$$Register);
16251   %}
16252   ins_pipe(ialu_reg_reg);
16253 %}
16254 
16255 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16256 %{
16257   match(Set cr (OverflowSubL op1 op2));
16258 
16259   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16260   ins_encode %{
16261     __ cmpq($op1$$Register, $op2$$constant);
16262   %}
16263   ins_pipe(ialu_reg_reg);
16264 %}
16265 
16266 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16267 %{
16268   match(Set cr (OverflowSubI zero op2));
16269   effect(DEF cr, USE_KILL op2);
16270 
16271   format %{ "negl    $op2\t# overflow check int" %}
16272   ins_encode %{
16273     __ negl($op2$$Register);
16274   %}
16275   ins_pipe(ialu_reg_reg);
16276 %}
16277 
16278 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16279 %{
16280   match(Set cr (OverflowSubL zero op2));
16281   effect(DEF cr, USE_KILL op2);
16282 
16283   format %{ "negq    $op2\t# overflow check long" %}
16284   ins_encode %{
16285     __ negq($op2$$Register);
16286   %}
16287   ins_pipe(ialu_reg_reg);
16288 %}
16289 
16290 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16291 %{
16292   match(Set cr (OverflowMulI op1 op2));
16293   effect(DEF cr, USE_KILL op1, USE op2);
16294 
16295   format %{ "imull    $op1, $op2\t# overflow check int" %}
16296   ins_encode %{
16297     __ imull($op1$$Register, $op2$$Register);
16298   %}
16299   ins_pipe(ialu_reg_reg_alu0);
16300 %}
16301 
16302 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16303 %{
16304   match(Set cr (OverflowMulI op1 op2));
16305   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16306 
16307   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
16308   ins_encode %{
16309     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16310   %}
16311   ins_pipe(ialu_reg_reg_alu0);
16312 %}
16313 
16314 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16315 %{
16316   match(Set cr (OverflowMulL op1 op2));
16317   effect(DEF cr, USE_KILL op1, USE op2);
16318 
16319   format %{ "imulq    $op1, $op2\t# overflow check long" %}
16320   ins_encode %{
16321     __ imulq($op1$$Register, $op2$$Register);
16322   %}
16323   ins_pipe(ialu_reg_reg_alu0);
16324 %}
16325 
16326 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16327 %{
16328   match(Set cr (OverflowMulL op1 op2));
16329   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16330 
16331   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
16332   ins_encode %{
16333     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16334   %}
16335   ins_pipe(ialu_reg_reg_alu0);
16336 %}
16337 
16338 
16339 //----------Control Flow Instructions------------------------------------------
16340 // Signed compare Instructions
16341 
16342 // XXX more variants!!
16343 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16344 %{
16345   match(Set cr (CmpI op1 op2));
16346   effect(DEF cr, USE op1, USE op2);
16347 
16348   format %{ "cmpl    $op1, $op2" %}
16349   ins_encode %{
16350     __ cmpl($op1$$Register, $op2$$Register);
16351   %}
16352   ins_pipe(ialu_cr_reg_reg);
16353 %}
16354 
16355 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16356 %{
16357   match(Set cr (CmpI op1 op2));
16358 
16359   format %{ "cmpl    $op1, $op2" %}
16360   ins_encode %{
16361     __ cmpl($op1$$Register, $op2$$constant);
16362   %}
16363   ins_pipe(ialu_cr_reg_imm);
16364 %}
16365 
16366 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16367 %{
16368   match(Set cr (CmpI op1 (LoadI op2)));
16369 
16370   ins_cost(500); // XXX
16371   format %{ "cmpl    $op1, $op2" %}
16372   ins_encode %{
16373     __ cmpl($op1$$Register, $op2$$Address);
16374   %}
16375   ins_pipe(ialu_cr_reg_mem);
16376 %}
16377 
16378 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16379 %{
16380   match(Set cr (CmpI src zero));
16381 
16382   format %{ "testl   $src, $src" %}
16383   ins_encode %{
16384     __ testl($src$$Register, $src$$Register);
16385   %}
16386   ins_pipe(ialu_cr_reg_imm);
16387 %}
16388 
16389 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16390 %{
16391   match(Set cr (CmpI (AndI src con) zero));
16392 
16393   format %{ "testl   $src, $con" %}
16394   ins_encode %{
16395     __ testl($src$$Register, $con$$constant);
16396   %}
16397   ins_pipe(ialu_cr_reg_imm);
16398 %}
16399 
16400 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16401 %{
16402   match(Set cr (CmpI (AndI src1 src2) zero));
16403 
16404   format %{ "testl   $src1, $src2" %}
16405   ins_encode %{
16406     __ testl($src1$$Register, $src2$$Register);
16407   %}
16408   ins_pipe(ialu_cr_reg_imm);
16409 %}
16410 
16411 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16412 %{
16413   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16414 
16415   format %{ "testl   $src, $mem" %}
16416   ins_encode %{
16417     __ testl($src$$Register, $mem$$Address);
16418   %}
16419   ins_pipe(ialu_cr_reg_mem);
16420 %}
16421 
16422 // Unsigned compare Instructions; really, same as signed except they
16423 // produce an rFlagsRegU instead of rFlagsReg.
16424 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16425 %{
16426   match(Set cr (CmpU op1 op2));
16427 
16428   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16429   ins_encode %{
16430     __ cmpl($op1$$Register, $op2$$Register);
16431   %}
16432   ins_pipe(ialu_cr_reg_reg);
16433 %}
16434 
16435 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16436 %{
16437   match(Set cr (CmpU op1 op2));
16438 
16439   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16440   ins_encode %{
16441     __ cmpl($op1$$Register, $op2$$constant);
16442   %}
16443   ins_pipe(ialu_cr_reg_imm);
16444 %}
16445 
16446 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16447 %{
16448   match(Set cr (CmpU op1 (LoadI op2)));
16449 
16450   ins_cost(500); // XXX
16451   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16452   ins_encode %{
16453     __ cmpl($op1$$Register, $op2$$Address);
16454   %}
16455   ins_pipe(ialu_cr_reg_mem);
16456 %}
16457 
16458 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16459 %{
16460   match(Set cr (CmpU src zero));
16461 
16462   format %{ "testl   $src, $src\t# unsigned" %}
16463   ins_encode %{
16464     __ testl($src$$Register, $src$$Register);
16465   %}
16466   ins_pipe(ialu_cr_reg_imm);
16467 %}
16468 
16469 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16470 %{
16471   match(Set cr (CmpP op1 op2));
16472 
16473   format %{ "cmpq    $op1, $op2\t# ptr" %}
16474   ins_encode %{
16475     __ cmpq($op1$$Register, $op2$$Register);
16476   %}
16477   ins_pipe(ialu_cr_reg_reg);
16478 %}
16479 
16480 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16481 %{
16482   match(Set cr (CmpP op1 (LoadP op2)));
16483   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16484 
16485   ins_cost(500); // XXX
16486   format %{ "cmpq    $op1, $op2\t# ptr" %}
16487   ins_encode %{
16488     __ cmpq($op1$$Register, $op2$$Address);
16489   %}
16490   ins_pipe(ialu_cr_reg_mem);
16491 %}
16492 
16493 // XXX this is generalized by compP_rReg_mem???
16494 // Compare raw pointer (used in out-of-heap check).
16495 // Only works because non-oop pointers must be raw pointers
16496 // and raw pointers have no anti-dependencies.
16497 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16498 %{
16499   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16500             n->in(2)->as_Load()->barrier_data() == 0);
16501   match(Set cr (CmpP op1 (LoadP op2)));
16502 
16503   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16504   ins_encode %{
16505     __ cmpq($op1$$Register, $op2$$Address);
16506   %}
16507   ins_pipe(ialu_cr_reg_mem);
16508 %}
16509 
16510 // This will generate a signed flags result. This should be OK since
16511 // any compare to a zero should be eq/neq.
16512 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16513 %{
16514   match(Set cr (CmpP src zero));
16515 
16516   format %{ "testq   $src, $src\t# ptr" %}
16517   ins_encode %{
16518     __ testq($src$$Register, $src$$Register);
16519   %}
16520   ins_pipe(ialu_cr_reg_imm);
16521 %}
16522 
16523 // This will generate a signed flags result. This should be OK since
16524 // any compare to a zero should be eq/neq.
16525 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16526 %{
16527   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16528             n->in(1)->as_Load()->barrier_data() == 0);
16529   match(Set cr (CmpP (LoadP op) zero));
16530 
16531   ins_cost(500); // XXX
16532   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16533   ins_encode %{
16534     __ testq($op$$Address, 0xFFFFFFFF);
16535   %}
16536   ins_pipe(ialu_cr_reg_imm);
16537 %}
16538 
16539 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16540 %{
16541   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16542             n->in(1)->as_Load()->barrier_data() == 0);
16543   match(Set cr (CmpP (LoadP mem) zero));
16544 
16545   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16546   ins_encode %{
16547     __ cmpq(r12, $mem$$Address);
16548   %}
16549   ins_pipe(ialu_cr_reg_mem);
16550 %}
16551 
16552 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16553 %{
16554   match(Set cr (CmpN op1 op2));
16555 
16556   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16557   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16558   ins_pipe(ialu_cr_reg_reg);
16559 %}
16560 
16561 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16562 %{
16563   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16564   match(Set cr (CmpN src (LoadN mem)));
16565 
16566   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16567   ins_encode %{
16568     __ cmpl($src$$Register, $mem$$Address);
16569   %}
16570   ins_pipe(ialu_cr_reg_mem);
16571 %}
16572 
16573 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16574   match(Set cr (CmpN op1 op2));
16575 
16576   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16577   ins_encode %{
16578     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16579   %}
16580   ins_pipe(ialu_cr_reg_imm);
16581 %}
16582 
16583 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16584 %{
16585   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16586   match(Set cr (CmpN src (LoadN mem)));
16587 
16588   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16589   ins_encode %{
16590     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16591   %}
16592   ins_pipe(ialu_cr_reg_mem);
16593 %}
16594 
16595 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16596   match(Set cr (CmpN op1 op2));
16597 
16598   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16599   ins_encode %{
16600     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16601   %}
16602   ins_pipe(ialu_cr_reg_imm);
16603 %}
16604 
16605 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16606 %{
16607   predicate(!UseCompactObjectHeaders);
16608   match(Set cr (CmpN src (LoadNKlass mem)));
16609 
16610   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16611   ins_encode %{
16612     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16613   %}
16614   ins_pipe(ialu_cr_reg_mem);
16615 %}
16616 
16617 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16618   match(Set cr (CmpN src zero));
16619 
16620   format %{ "testl   $src, $src\t# compressed ptr" %}
16621   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16622   ins_pipe(ialu_cr_reg_imm);
16623 %}
16624 
16625 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16626 %{
16627   predicate(CompressedOops::base() != nullptr &&
16628             n->in(1)->as_Load()->barrier_data() == 0);
16629   match(Set cr (CmpN (LoadN mem) zero));
16630 
16631   ins_cost(500); // XXX
16632   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16633   ins_encode %{
16634     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16635   %}
16636   ins_pipe(ialu_cr_reg_mem);
16637 %}
16638 
16639 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16640 %{
16641   predicate(CompressedOops::base() == nullptr &&
16642             n->in(1)->as_Load()->barrier_data() == 0);
16643   match(Set cr (CmpN (LoadN mem) zero));
16644 
16645   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16646   ins_encode %{
16647     __ cmpl(r12, $mem$$Address);
16648   %}
16649   ins_pipe(ialu_cr_reg_mem);
16650 %}
16651 
16652 // Yanked all unsigned pointer compare operations.
16653 // Pointer compares are done with CmpP which is already unsigned.
16654 
16655 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16656 %{
16657   match(Set cr (CmpL op1 op2));
16658 
16659   format %{ "cmpq    $op1, $op2" %}
16660   ins_encode %{
16661     __ cmpq($op1$$Register, $op2$$Register);
16662   %}
16663   ins_pipe(ialu_cr_reg_reg);
16664 %}
16665 
16666 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16667 %{
16668   match(Set cr (CmpL op1 op2));
16669 
16670   format %{ "cmpq    $op1, $op2" %}
16671   ins_encode %{
16672     __ cmpq($op1$$Register, $op2$$constant);
16673   %}
16674   ins_pipe(ialu_cr_reg_imm);
16675 %}
16676 
16677 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16678 %{
16679   match(Set cr (CmpL op1 (LoadL op2)));
16680 
16681   format %{ "cmpq    $op1, $op2" %}
16682   ins_encode %{
16683     __ cmpq($op1$$Register, $op2$$Address);
16684   %}
16685   ins_pipe(ialu_cr_reg_mem);
16686 %}
16687 
16688 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16689 %{
16690   match(Set cr (CmpL src zero));
16691 
16692   format %{ "testq   $src, $src" %}
16693   ins_encode %{
16694     __ testq($src$$Register, $src$$Register);
16695   %}
16696   ins_pipe(ialu_cr_reg_imm);
16697 %}
16698 
16699 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16700 %{
16701   match(Set cr (CmpL (AndL src con) zero));
16702 
16703   format %{ "testq   $src, $con\t# long" %}
16704   ins_encode %{
16705     __ testq($src$$Register, $con$$constant);
16706   %}
16707   ins_pipe(ialu_cr_reg_imm);
16708 %}
16709 
16710 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16711 %{
16712   match(Set cr (CmpL (AndL src1 src2) zero));
16713 
16714   format %{ "testq   $src1, $src2\t# long" %}
16715   ins_encode %{
16716     __ testq($src1$$Register, $src2$$Register);
16717   %}
16718   ins_pipe(ialu_cr_reg_imm);
16719 %}
16720 
16721 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16722 %{
16723   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16724 
16725   format %{ "testq   $src, $mem" %}
16726   ins_encode %{
16727     __ testq($src$$Register, $mem$$Address);
16728   %}
16729   ins_pipe(ialu_cr_reg_mem);
16730 %}
16731 
16732 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16733 %{
16734   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16735 
16736   format %{ "testq   $src, $mem" %}
16737   ins_encode %{
16738     __ testq($src$$Register, $mem$$Address);
16739   %}
16740   ins_pipe(ialu_cr_reg_mem);
16741 %}
16742 
16743 // Manifest a CmpU result in an integer register.  Very painful.
16744 // This is the test to avoid.
16745 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16746 %{
16747   match(Set dst (CmpU3 src1 src2));
16748   effect(KILL flags);
16749 
16750   ins_cost(275); // XXX
16751   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16752             "movl    $dst, -1\n\t"
16753             "jb,u    done\n\t"
16754             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16755     "done:" %}
16756   ins_encode %{
16757     Label done;
16758     __ cmpl($src1$$Register, $src2$$Register);
16759     __ movl($dst$$Register, -1);
16760     __ jccb(Assembler::below, done);
16761     __ setcc(Assembler::notZero, $dst$$Register);
16762     __ bind(done);
16763   %}
16764   ins_pipe(pipe_slow);
16765 %}
16766 
16767 // Manifest a CmpL result in an integer register.  Very painful.
16768 // This is the test to avoid.
16769 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16770 %{
16771   match(Set dst (CmpL3 src1 src2));
16772   effect(KILL flags);
16773 
16774   ins_cost(275); // XXX
16775   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16776             "movl    $dst, -1\n\t"
16777             "jl,s    done\n\t"
16778             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16779     "done:" %}
16780   ins_encode %{
16781     Label done;
16782     __ cmpq($src1$$Register, $src2$$Register);
16783     __ movl($dst$$Register, -1);
16784     __ jccb(Assembler::less, done);
16785     __ setcc(Assembler::notZero, $dst$$Register);
16786     __ bind(done);
16787   %}
16788   ins_pipe(pipe_slow);
16789 %}
16790 
16791 // Manifest a CmpUL result in an integer register.  Very painful.
16792 // This is the test to avoid.
16793 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16794 %{
16795   match(Set dst (CmpUL3 src1 src2));
16796   effect(KILL flags);
16797 
16798   ins_cost(275); // XXX
16799   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16800             "movl    $dst, -1\n\t"
16801             "jb,u    done\n\t"
16802             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16803     "done:" %}
16804   ins_encode %{
16805     Label done;
16806     __ cmpq($src1$$Register, $src2$$Register);
16807     __ movl($dst$$Register, -1);
16808     __ jccb(Assembler::below, done);
16809     __ setcc(Assembler::notZero, $dst$$Register);
16810     __ bind(done);
16811   %}
16812   ins_pipe(pipe_slow);
16813 %}
16814 
16815 // Unsigned long compare Instructions; really, same as signed long except they
16816 // produce an rFlagsRegU instead of rFlagsReg.
16817 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16818 %{
16819   match(Set cr (CmpUL op1 op2));
16820 
16821   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16822   ins_encode %{
16823     __ cmpq($op1$$Register, $op2$$Register);
16824   %}
16825   ins_pipe(ialu_cr_reg_reg);
16826 %}
16827 
16828 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16829 %{
16830   match(Set cr (CmpUL op1 op2));
16831 
16832   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16833   ins_encode %{
16834     __ cmpq($op1$$Register, $op2$$constant);
16835   %}
16836   ins_pipe(ialu_cr_reg_imm);
16837 %}
16838 
16839 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16840 %{
16841   match(Set cr (CmpUL op1 (LoadL op2)));
16842 
16843   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16844   ins_encode %{
16845     __ cmpq($op1$$Register, $op2$$Address);
16846   %}
16847   ins_pipe(ialu_cr_reg_mem);
16848 %}
16849 
16850 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16851 %{
16852   match(Set cr (CmpUL src zero));
16853 
16854   format %{ "testq   $src, $src\t# unsigned" %}
16855   ins_encode %{
16856     __ testq($src$$Register, $src$$Register);
16857   %}
16858   ins_pipe(ialu_cr_reg_imm);
16859 %}
16860 
16861 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16862 %{
16863   match(Set cr (CmpI (LoadB mem) imm));
16864 
16865   ins_cost(125);
16866   format %{ "cmpb    $mem, $imm" %}
16867   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16868   ins_pipe(ialu_cr_reg_mem);
16869 %}
16870 
16871 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16872 %{
16873   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16874 
16875   ins_cost(125);
16876   format %{ "testb   $mem, $imm\t# ubyte" %}
16877   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16878   ins_pipe(ialu_cr_reg_mem);
16879 %}
16880 
16881 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16882 %{
16883   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16884 
16885   ins_cost(125);
16886   format %{ "testb   $mem, $imm\t# byte" %}
16887   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16888   ins_pipe(ialu_cr_reg_mem);
16889 %}
16890 
16891 //----------Max and Min--------------------------------------------------------
16892 // Min Instructions
16893 
16894 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16895 %{
16896   predicate(!UseAPX);
16897   effect(USE_DEF dst, USE src, USE cr);
16898 
16899   format %{ "cmovlgt $dst, $src\t# min" %}
16900   ins_encode %{
16901     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16902   %}
16903   ins_pipe(pipe_cmov_reg);
16904 %}
16905 
16906 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16907 %{
16908   predicate(UseAPX);
16909   effect(DEF dst, USE src1, USE src2, USE cr);
16910 
16911   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16912   ins_encode %{
16913     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16914   %}
16915   ins_pipe(pipe_cmov_reg);
16916 %}
16917 
16918 instruct minI_rReg(rRegI dst, rRegI src)
16919 %{
16920   predicate(!UseAPX);
16921   match(Set dst (MinI dst src));
16922 
16923   ins_cost(200);
16924   expand %{
16925     rFlagsReg cr;
16926     compI_rReg(cr, dst, src);
16927     cmovI_reg_g(dst, src, cr);
16928   %}
16929 %}
16930 
16931 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16932 %{
16933   predicate(UseAPX);
16934   match(Set dst (MinI src1 src2));
16935   effect(DEF dst, USE src1, USE src2);
16936   flag(PD::Flag_ndd_demotable_opr1);
16937 
16938   ins_cost(200);
16939   expand %{
16940     rFlagsReg cr;
16941     compI_rReg(cr, src1, src2);
16942     cmovI_reg_g_ndd(dst, src1, src2, cr);
16943   %}
16944 %}
16945 
16946 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16947 %{
16948   predicate(!UseAPX);
16949   effect(USE_DEF dst, USE src, USE cr);
16950 
16951   format %{ "cmovllt $dst, $src\t# max" %}
16952   ins_encode %{
16953     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16954   %}
16955   ins_pipe(pipe_cmov_reg);
16956 %}
16957 
16958 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16959 %{
16960   predicate(UseAPX);
16961   effect(DEF dst, USE src1, USE src2, USE cr);
16962 
16963   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16964   ins_encode %{
16965     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16966   %}
16967   ins_pipe(pipe_cmov_reg);
16968 %}
16969 
16970 instruct maxI_rReg(rRegI dst, rRegI src)
16971 %{
16972   predicate(!UseAPX);
16973   match(Set dst (MaxI dst src));
16974 
16975   ins_cost(200);
16976   expand %{
16977     rFlagsReg cr;
16978     compI_rReg(cr, dst, src);
16979     cmovI_reg_l(dst, src, cr);
16980   %}
16981 %}
16982 
16983 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16984 %{
16985   predicate(UseAPX);
16986   match(Set dst (MaxI src1 src2));
16987   effect(DEF dst, USE src1, USE src2);
16988   flag(PD::Flag_ndd_demotable_opr1);
16989 
16990   ins_cost(200);
16991   expand %{
16992     rFlagsReg cr;
16993     compI_rReg(cr, src1, src2);
16994     cmovI_reg_l_ndd(dst, src1, src2, cr);
16995   %}
16996 %}
16997 
16998 // ============================================================================
16999 // Branch Instructions
17000 
17001 // Jump Direct - Label defines a relative address from JMP+1
17002 instruct jmpDir(label labl)
17003 %{
17004   match(Goto);
17005   effect(USE labl);
17006 
17007   ins_cost(300);
17008   format %{ "jmp     $labl" %}
17009   size(5);
17010   ins_encode %{
17011     Label* L = $labl$$label;
17012     __ jmp(*L, false); // Always long jump
17013   %}
17014   ins_pipe(pipe_jmp);
17015 %}
17016 
17017 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17018 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
17019 %{
17020   match(If cop cr);
17021   effect(USE labl);
17022 
17023   ins_cost(300);
17024   format %{ "j$cop     $labl" %}
17025   size(6);
17026   ins_encode %{
17027     Label* L = $labl$$label;
17028     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17029   %}
17030   ins_pipe(pipe_jcc);
17031 %}
17032 
17033 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17034 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
17035 %{
17036   match(CountedLoopEnd cop cr);
17037   effect(USE labl);
17038 
17039   ins_cost(300);
17040   format %{ "j$cop     $labl\t# loop end" %}
17041   size(6);
17042   ins_encode %{
17043     Label* L = $labl$$label;
17044     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17045   %}
17046   ins_pipe(pipe_jcc);
17047 %}
17048 
17049 // Jump Direct Conditional - using unsigned comparison
17050 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17051   match(If cop cmp);
17052   effect(USE labl);
17053 
17054   ins_cost(300);
17055   format %{ "j$cop,u   $labl" %}
17056   size(6);
17057   ins_encode %{
17058     Label* L = $labl$$label;
17059     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17060   %}
17061   ins_pipe(pipe_jcc);
17062 %}
17063 
17064 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17065   match(If cop cmp);
17066   effect(USE labl);
17067 
17068   ins_cost(200);
17069   format %{ "j$cop,u   $labl" %}
17070   size(6);
17071   ins_encode %{
17072     Label* L = $labl$$label;
17073     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17074   %}
17075   ins_pipe(pipe_jcc);
17076 %}
17077 
17078 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17079   match(If cop cmp);
17080   effect(USE labl);
17081 
17082   ins_cost(200);
17083   format %{ $$template
17084     if ($cop$$cmpcode == Assembler::notEqual) {
17085       $$emit$$"jp,u    $labl\n\t"
17086       $$emit$$"j$cop,u   $labl"
17087     } else {
17088       $$emit$$"jp,u    done\n\t"
17089       $$emit$$"j$cop,u   $labl\n\t"
17090       $$emit$$"done:"
17091     }
17092   %}
17093   ins_encode %{
17094     Label* l = $labl$$label;
17095     if ($cop$$cmpcode == Assembler::notEqual) {
17096       __ jcc(Assembler::parity, *l, false);
17097       __ jcc(Assembler::notEqual, *l, false);
17098     } else if ($cop$$cmpcode == Assembler::equal) {
17099       Label done;
17100       __ jccb(Assembler::parity, done);
17101       __ jcc(Assembler::equal, *l, false);
17102       __ bind(done);
17103     } else {
17104        ShouldNotReachHere();
17105     }
17106   %}
17107   ins_pipe(pipe_jcc);
17108 %}
17109 
17110 // ============================================================================
17111 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
17112 // superklass array for an instance of the superklass.  Set a hidden
17113 // internal cache on a hit (cache is checked with exposed code in
17114 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
17115 // encoding ALSO sets flags.
17116 
17117 instruct partialSubtypeCheck(rdi_RegP result,
17118                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
17119                              rFlagsReg cr)
17120 %{
17121   match(Set result (PartialSubtypeCheck sub super));
17122   predicate(!UseSecondarySupersTable);
17123   effect(KILL rcx, KILL cr);
17124 
17125   ins_cost(1100);  // slightly larger than the next version
17126   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
17127             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
17128             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
17129             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
17130             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
17131             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
17132             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
17133     "miss:\t" %}
17134 
17135   ins_encode %{
17136     Label miss;
17137     // NB: Callers may assume that, when $result is a valid register,
17138     // check_klass_subtype_slow_path_linear sets it to a nonzero
17139     // value.
17140     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
17141                                             $rcx$$Register, $result$$Register,
17142                                             nullptr, &miss,
17143                                             /*set_cond_codes:*/ true);
17144     __ xorptr($result$$Register, $result$$Register);
17145     __ bind(miss);
17146   %}
17147 
17148   ins_pipe(pipe_slow);
17149 %}
17150 
17151 // ============================================================================
17152 // Two versions of hashtable-based partialSubtypeCheck, both used when
17153 // we need to search for a super class in the secondary supers array.
17154 // The first is used when we don't know _a priori_ the class being
17155 // searched for. The second, far more common, is used when we do know:
17156 // this is used for instanceof, checkcast, and any case where C2 can
17157 // determine it by constant propagation.
17158 
17159 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17160                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17161                                        rFlagsReg cr)
17162 %{
17163   match(Set result (PartialSubtypeCheck sub super));
17164   predicate(UseSecondarySupersTable);
17165   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17166 
17167   ins_cost(1000);
17168   format %{ "partialSubtypeCheck $result, $sub, $super" %}
17169 
17170   ins_encode %{
17171     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17172 					 $temp3$$Register, $temp4$$Register, $result$$Register);
17173   %}
17174 
17175   ins_pipe(pipe_slow);
17176 %}
17177 
17178 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17179                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17180                                        rFlagsReg cr)
17181 %{
17182   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17183   predicate(UseSecondarySupersTable);
17184   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17185 
17186   ins_cost(700);  // smaller than the next version
17187   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17188 
17189   ins_encode %{
17190     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17191     if (InlineSecondarySupersTest) {
17192       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17193                                        $temp3$$Register, $temp4$$Register, $result$$Register,
17194                                        super_klass_slot);
17195     } else {
17196       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17197     }
17198   %}
17199 
17200   ins_pipe(pipe_slow);
17201 %}
17202 
17203 // ============================================================================
17204 // Branch Instructions -- short offset versions
17205 //
17206 // These instructions are used to replace jumps of a long offset (the default
17207 // match) with jumps of a shorter offset.  These instructions are all tagged
17208 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17209 // match rules in general matching.  Instead, the ADLC generates a conversion
17210 // method in the MachNode which can be used to do in-place replacement of the
17211 // long variant with the shorter variant.  The compiler will determine if a
17212 // branch can be taken by the is_short_branch_offset() predicate in the machine
17213 // specific code section of the file.
17214 
17215 // Jump Direct - Label defines a relative address from JMP+1
17216 instruct jmpDir_short(label labl) %{
17217   match(Goto);
17218   effect(USE labl);
17219 
17220   ins_cost(300);
17221   format %{ "jmp,s   $labl" %}
17222   size(2);
17223   ins_encode %{
17224     Label* L = $labl$$label;
17225     __ jmpb(*L);
17226   %}
17227   ins_pipe(pipe_jmp);
17228   ins_short_branch(1);
17229 %}
17230 
17231 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17232 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17233   match(If cop cr);
17234   effect(USE labl);
17235 
17236   ins_cost(300);
17237   format %{ "j$cop,s   $labl" %}
17238   size(2);
17239   ins_encode %{
17240     Label* L = $labl$$label;
17241     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17242   %}
17243   ins_pipe(pipe_jcc);
17244   ins_short_branch(1);
17245 %}
17246 
17247 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17248 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17249   match(CountedLoopEnd cop cr);
17250   effect(USE labl);
17251 
17252   ins_cost(300);
17253   format %{ "j$cop,s   $labl\t# loop end" %}
17254   size(2);
17255   ins_encode %{
17256     Label* L = $labl$$label;
17257     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17258   %}
17259   ins_pipe(pipe_jcc);
17260   ins_short_branch(1);
17261 %}
17262 
17263 // Jump Direct Conditional - using unsigned comparison
17264 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17265   match(If cop cmp);
17266   effect(USE labl);
17267 
17268   ins_cost(300);
17269   format %{ "j$cop,us  $labl" %}
17270   size(2);
17271   ins_encode %{
17272     Label* L = $labl$$label;
17273     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17274   %}
17275   ins_pipe(pipe_jcc);
17276   ins_short_branch(1);
17277 %}
17278 
17279 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17280   match(If cop cmp);
17281   effect(USE labl);
17282 
17283   ins_cost(300);
17284   format %{ "j$cop,us  $labl" %}
17285   size(2);
17286   ins_encode %{
17287     Label* L = $labl$$label;
17288     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17289   %}
17290   ins_pipe(pipe_jcc);
17291   ins_short_branch(1);
17292 %}
17293 
17294 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17295   match(If cop cmp);
17296   effect(USE labl);
17297 
17298   ins_cost(300);
17299   format %{ $$template
17300     if ($cop$$cmpcode == Assembler::notEqual) {
17301       $$emit$$"jp,u,s  $labl\n\t"
17302       $$emit$$"j$cop,u,s  $labl"
17303     } else {
17304       $$emit$$"jp,u,s  done\n\t"
17305       $$emit$$"j$cop,u,s  $labl\n\t"
17306       $$emit$$"done:"
17307     }
17308   %}
17309   size(4);
17310   ins_encode %{
17311     Label* l = $labl$$label;
17312     if ($cop$$cmpcode == Assembler::notEqual) {
17313       __ jccb(Assembler::parity, *l);
17314       __ jccb(Assembler::notEqual, *l);
17315     } else if ($cop$$cmpcode == Assembler::equal) {
17316       Label done;
17317       __ jccb(Assembler::parity, done);
17318       __ jccb(Assembler::equal, *l);
17319       __ bind(done);
17320     } else {
17321        ShouldNotReachHere();
17322     }
17323   %}
17324   ins_pipe(pipe_jcc);
17325   ins_short_branch(1);
17326 %}
17327 
17328 // ============================================================================
17329 // inlined locking and unlocking
17330 
17331 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17332   match(Set cr (FastLock object box));
17333   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17334   ins_cost(300);
17335   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17336   ins_encode %{
17337     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17338   %}
17339   ins_pipe(pipe_slow);
17340 %}
17341 
17342 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17343   match(Set cr (FastUnlock object rax_reg));
17344   effect(TEMP tmp, USE_KILL rax_reg);
17345   ins_cost(300);
17346   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17347   ins_encode %{
17348     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17349   %}
17350   ins_pipe(pipe_slow);
17351 %}
17352 
17353 
17354 // ============================================================================
17355 // Safepoint Instructions
17356 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17357 %{
17358   match(SafePoint poll);
17359   effect(KILL cr, USE poll);
17360 
17361   format %{ "testl   rax, [$poll]\t"
17362             "# Safepoint: poll for GC" %}
17363   ins_cost(125);
17364   ins_encode %{
17365     __ relocate(relocInfo::poll_type);
17366     address pre_pc = __ pc();
17367     __ testl(rax, Address($poll$$Register, 0));
17368     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17369   %}
17370   ins_pipe(ialu_reg_mem);
17371 %}
17372 
17373 instruct mask_all_evexL(kReg dst, rRegL src) %{
17374   match(Set dst (MaskAll src));
17375   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17376   ins_encode %{
17377     int mask_len = Matcher::vector_length(this);
17378     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17379   %}
17380   ins_pipe( pipe_slow );
17381 %}
17382 
17383 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17384   predicate(Matcher::vector_length(n) > 32);
17385   match(Set dst (MaskAll src));
17386   effect(TEMP tmp);
17387   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17388   ins_encode %{
17389     int mask_len = Matcher::vector_length(this);
17390     __ movslq($tmp$$Register, $src$$Register);
17391     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17392   %}
17393   ins_pipe( pipe_slow );
17394 %}
17395 
17396 // ============================================================================
17397 // Procedure Call/Return Instructions
17398 // Call Java Static Instruction
17399 // Note: If this code changes, the corresponding ret_addr_offset() and
17400 //       compute_padding() functions will have to be adjusted.
17401 instruct CallStaticJavaDirect(method meth) %{
17402   match(CallStaticJava);
17403   effect(USE meth);
17404 
17405   ins_cost(300);
17406   format %{ "call,static " %}
17407   opcode(0xE8); /* E8 cd */
17408   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17409   ins_pipe(pipe_slow);
17410   ins_alignment(4);
17411 %}
17412 
17413 // Call Java Dynamic Instruction
17414 // Note: If this code changes, the corresponding ret_addr_offset() and
17415 //       compute_padding() functions will have to be adjusted.
17416 instruct CallDynamicJavaDirect(method meth)
17417 %{
17418   match(CallDynamicJava);
17419   effect(USE meth);
17420 
17421   ins_cost(300);
17422   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17423             "call,dynamic " %}
17424   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17425   ins_pipe(pipe_slow);
17426   ins_alignment(4);
17427 %}
17428 
17429 // Call Runtime Instruction
17430 instruct CallRuntimeDirect(method meth)
17431 %{
17432   match(CallRuntime);
17433   effect(USE meth);
17434 
17435   ins_cost(300);
17436   format %{ "call,runtime " %}
17437   ins_encode(clear_avx, Java_To_Runtime(meth));
17438   ins_pipe(pipe_slow);
17439 %}
17440 
17441 // Call runtime without safepoint
17442 instruct CallLeafDirect(method meth)
17443 %{
17444   match(CallLeaf);
17445   effect(USE meth);
17446 
17447   ins_cost(300);
17448   format %{ "call_leaf,runtime " %}
17449   ins_encode(clear_avx, Java_To_Runtime(meth));
17450   ins_pipe(pipe_slow);
17451 %}
17452 
17453 // Call runtime without safepoint and with vector arguments
17454 instruct CallLeafDirectVector(method meth)
17455 %{
17456   match(CallLeafVector);
17457   effect(USE meth);
17458 
17459   ins_cost(300);
17460   format %{ "call_leaf,vector " %}
17461   ins_encode(Java_To_Runtime(meth));
17462   ins_pipe(pipe_slow);
17463 %}
17464 
17465 // Call runtime without safepoint
17466 // entry point is null, target holds the address to call
17467 instruct CallLeafNoFPInDirect(rRegP target)
17468 %{
17469   predicate(n->as_Call()->entry_point() == nullptr);
17470   match(CallLeafNoFP target);
17471 
17472   ins_cost(300);
17473   format %{ "call_leaf_nofp,runtime indirect " %}
17474   ins_encode %{
17475      __ call($target$$Register);
17476   %}
17477 
17478   ins_pipe(pipe_slow);
17479 %}
17480 
17481 // Call runtime without safepoint
17482 instruct CallLeafNoFPDirect(method meth)
17483 %{
17484   predicate(n->as_Call()->entry_point() != nullptr);
17485   match(CallLeafNoFP);
17486   effect(USE meth);
17487 
17488   ins_cost(300);
17489   format %{ "call_leaf_nofp,runtime " %}
17490   ins_encode(clear_avx, Java_To_Runtime(meth));
17491   ins_pipe(pipe_slow);
17492 %}
17493 
17494 // Return Instruction
17495 // Remove the return address & jump to it.
17496 // Notice: We always emit a nop after a ret to make sure there is room
17497 // for safepoint patching
17498 instruct Ret()
17499 %{
17500   match(Return);
17501 
17502   format %{ "ret" %}
17503   ins_encode %{
17504     __ ret(0);
17505   %}
17506   ins_pipe(pipe_jmp);
17507 %}
17508 
17509 // Tail Call; Jump from runtime stub to Java code.
17510 // Also known as an 'interprocedural jump'.
17511 // Target of jump will eventually return to caller.
17512 // TailJump below removes the return address.
17513 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17514 // emitted just above the TailCall which has reset rbp to the caller state.
17515 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17516 %{
17517   match(TailCall jump_target method_ptr);
17518 
17519   ins_cost(300);
17520   format %{ "jmp     $jump_target\t# rbx holds method" %}
17521   ins_encode %{
17522     __ jmp($jump_target$$Register);
17523   %}
17524   ins_pipe(pipe_jmp);
17525 %}
17526 
17527 // Tail Jump; remove the return address; jump to target.
17528 // TailCall above leaves the return address around.
17529 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17530 %{
17531   match(TailJump jump_target ex_oop);
17532 
17533   ins_cost(300);
17534   format %{ "popq    rdx\t# pop return address\n\t"
17535             "jmp     $jump_target" %}
17536   ins_encode %{
17537     __ popq(as_Register(RDX_enc));
17538     __ jmp($jump_target$$Register);
17539   %}
17540   ins_pipe(pipe_jmp);
17541 %}
17542 
17543 // Forward exception.
17544 instruct ForwardExceptionjmp()
17545 %{
17546   match(ForwardException);
17547 
17548   format %{ "jmp     forward_exception_stub" %}
17549   ins_encode %{
17550     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17551   %}
17552   ins_pipe(pipe_jmp);
17553 %}
17554 
17555 // Create exception oop: created by stack-crawling runtime code.
17556 // Created exception is now available to this handler, and is setup
17557 // just prior to jumping to this handler.  No code emitted.
17558 instruct CreateException(rax_RegP ex_oop)
17559 %{
17560   match(Set ex_oop (CreateEx));
17561 
17562   size(0);
17563   // use the following format syntax
17564   format %{ "# exception oop is in rax; no code emitted" %}
17565   ins_encode();
17566   ins_pipe(empty);
17567 %}
17568 
17569 // Rethrow exception:
17570 // The exception oop will come in the first argument position.
17571 // Then JUMP (not call) to the rethrow stub code.
17572 instruct RethrowException()
17573 %{
17574   match(Rethrow);
17575 
17576   // use the following format syntax
17577   format %{ "jmp     rethrow_stub" %}
17578   ins_encode %{
17579     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17580   %}
17581   ins_pipe(pipe_jmp);
17582 %}
17583 
17584 // ============================================================================
17585 // This name is KNOWN by the ADLC and cannot be changed.
17586 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17587 // for this guy.
17588 instruct tlsLoadP(r15_RegP dst) %{
17589   match(Set dst (ThreadLocal));
17590   effect(DEF dst);
17591 
17592   size(0);
17593   format %{ "# TLS is in R15" %}
17594   ins_encode( /*empty encoding*/ );
17595   ins_pipe(ialu_reg_reg);
17596 %}
17597 
17598 instruct addF_reg(regF dst, regF src) %{
17599   predicate(UseAVX == 0);
17600   match(Set dst (AddF dst src));
17601 
17602   format %{ "addss   $dst, $src" %}
17603   ins_cost(150);
17604   ins_encode %{
17605     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17606   %}
17607   ins_pipe(pipe_slow);
17608 %}
17609 
17610 instruct addF_mem(regF dst, memory src) %{
17611   predicate(UseAVX == 0);
17612   match(Set dst (AddF dst (LoadF src)));
17613 
17614   format %{ "addss   $dst, $src" %}
17615   ins_cost(150);
17616   ins_encode %{
17617     __ addss($dst$$XMMRegister, $src$$Address);
17618   %}
17619   ins_pipe(pipe_slow);
17620 %}
17621 
17622 instruct addF_imm(regF dst, immF con) %{
17623   predicate(UseAVX == 0);
17624   match(Set dst (AddF dst con));
17625   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17626   ins_cost(150);
17627   ins_encode %{
17628     __ addss($dst$$XMMRegister, $constantaddress($con));
17629   %}
17630   ins_pipe(pipe_slow);
17631 %}
17632 
17633 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17634   predicate(UseAVX > 0);
17635   match(Set dst (AddF src1 src2));
17636 
17637   format %{ "vaddss  $dst, $src1, $src2" %}
17638   ins_cost(150);
17639   ins_encode %{
17640     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17641   %}
17642   ins_pipe(pipe_slow);
17643 %}
17644 
17645 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17646   predicate(UseAVX > 0);
17647   match(Set dst (AddF src1 (LoadF src2)));
17648 
17649   format %{ "vaddss  $dst, $src1, $src2" %}
17650   ins_cost(150);
17651   ins_encode %{
17652     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17653   %}
17654   ins_pipe(pipe_slow);
17655 %}
17656 
17657 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17658   predicate(UseAVX > 0);
17659   match(Set dst (AddF src con));
17660 
17661   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17662   ins_cost(150);
17663   ins_encode %{
17664     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17665   %}
17666   ins_pipe(pipe_slow);
17667 %}
17668 
17669 instruct addD_reg(regD dst, regD src) %{
17670   predicate(UseAVX == 0);
17671   match(Set dst (AddD dst src));
17672 
17673   format %{ "addsd   $dst, $src" %}
17674   ins_cost(150);
17675   ins_encode %{
17676     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17677   %}
17678   ins_pipe(pipe_slow);
17679 %}
17680 
17681 instruct addD_mem(regD dst, memory src) %{
17682   predicate(UseAVX == 0);
17683   match(Set dst (AddD dst (LoadD src)));
17684 
17685   format %{ "addsd   $dst, $src" %}
17686   ins_cost(150);
17687   ins_encode %{
17688     __ addsd($dst$$XMMRegister, $src$$Address);
17689   %}
17690   ins_pipe(pipe_slow);
17691 %}
17692 
17693 instruct addD_imm(regD dst, immD con) %{
17694   predicate(UseAVX == 0);
17695   match(Set dst (AddD dst con));
17696   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17697   ins_cost(150);
17698   ins_encode %{
17699     __ addsd($dst$$XMMRegister, $constantaddress($con));
17700   %}
17701   ins_pipe(pipe_slow);
17702 %}
17703 
17704 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17705   predicate(UseAVX > 0);
17706   match(Set dst (AddD src1 src2));
17707 
17708   format %{ "vaddsd  $dst, $src1, $src2" %}
17709   ins_cost(150);
17710   ins_encode %{
17711     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17712   %}
17713   ins_pipe(pipe_slow);
17714 %}
17715 
17716 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17717   predicate(UseAVX > 0);
17718   match(Set dst (AddD src1 (LoadD src2)));
17719 
17720   format %{ "vaddsd  $dst, $src1, $src2" %}
17721   ins_cost(150);
17722   ins_encode %{
17723     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17724   %}
17725   ins_pipe(pipe_slow);
17726 %}
17727 
17728 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17729   predicate(UseAVX > 0);
17730   match(Set dst (AddD src con));
17731 
17732   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17733   ins_cost(150);
17734   ins_encode %{
17735     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17736   %}
17737   ins_pipe(pipe_slow);
17738 %}
17739 
17740 instruct subF_reg(regF dst, regF src) %{
17741   predicate(UseAVX == 0);
17742   match(Set dst (SubF dst src));
17743 
17744   format %{ "subss   $dst, $src" %}
17745   ins_cost(150);
17746   ins_encode %{
17747     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17748   %}
17749   ins_pipe(pipe_slow);
17750 %}
17751 
17752 instruct subF_mem(regF dst, memory src) %{
17753   predicate(UseAVX == 0);
17754   match(Set dst (SubF dst (LoadF src)));
17755 
17756   format %{ "subss   $dst, $src" %}
17757   ins_cost(150);
17758   ins_encode %{
17759     __ subss($dst$$XMMRegister, $src$$Address);
17760   %}
17761   ins_pipe(pipe_slow);
17762 %}
17763 
17764 instruct subF_imm(regF dst, immF con) %{
17765   predicate(UseAVX == 0);
17766   match(Set dst (SubF dst con));
17767   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17768   ins_cost(150);
17769   ins_encode %{
17770     __ subss($dst$$XMMRegister, $constantaddress($con));
17771   %}
17772   ins_pipe(pipe_slow);
17773 %}
17774 
17775 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17776   predicate(UseAVX > 0);
17777   match(Set dst (SubF src1 src2));
17778 
17779   format %{ "vsubss  $dst, $src1, $src2" %}
17780   ins_cost(150);
17781   ins_encode %{
17782     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17783   %}
17784   ins_pipe(pipe_slow);
17785 %}
17786 
17787 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17788   predicate(UseAVX > 0);
17789   match(Set dst (SubF src1 (LoadF src2)));
17790 
17791   format %{ "vsubss  $dst, $src1, $src2" %}
17792   ins_cost(150);
17793   ins_encode %{
17794     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17795   %}
17796   ins_pipe(pipe_slow);
17797 %}
17798 
17799 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17800   predicate(UseAVX > 0);
17801   match(Set dst (SubF src con));
17802 
17803   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17804   ins_cost(150);
17805   ins_encode %{
17806     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17807   %}
17808   ins_pipe(pipe_slow);
17809 %}
17810 
17811 instruct subD_reg(regD dst, regD src) %{
17812   predicate(UseAVX == 0);
17813   match(Set dst (SubD dst src));
17814 
17815   format %{ "subsd   $dst, $src" %}
17816   ins_cost(150);
17817   ins_encode %{
17818     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17819   %}
17820   ins_pipe(pipe_slow);
17821 %}
17822 
17823 instruct subD_mem(regD dst, memory src) %{
17824   predicate(UseAVX == 0);
17825   match(Set dst (SubD dst (LoadD src)));
17826 
17827   format %{ "subsd   $dst, $src" %}
17828   ins_cost(150);
17829   ins_encode %{
17830     __ subsd($dst$$XMMRegister, $src$$Address);
17831   %}
17832   ins_pipe(pipe_slow);
17833 %}
17834 
17835 instruct subD_imm(regD dst, immD con) %{
17836   predicate(UseAVX == 0);
17837   match(Set dst (SubD dst con));
17838   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17839   ins_cost(150);
17840   ins_encode %{
17841     __ subsd($dst$$XMMRegister, $constantaddress($con));
17842   %}
17843   ins_pipe(pipe_slow);
17844 %}
17845 
17846 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17847   predicate(UseAVX > 0);
17848   match(Set dst (SubD src1 src2));
17849 
17850   format %{ "vsubsd  $dst, $src1, $src2" %}
17851   ins_cost(150);
17852   ins_encode %{
17853     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17854   %}
17855   ins_pipe(pipe_slow);
17856 %}
17857 
17858 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17859   predicate(UseAVX > 0);
17860   match(Set dst (SubD src1 (LoadD src2)));
17861 
17862   format %{ "vsubsd  $dst, $src1, $src2" %}
17863   ins_cost(150);
17864   ins_encode %{
17865     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17866   %}
17867   ins_pipe(pipe_slow);
17868 %}
17869 
17870 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17871   predicate(UseAVX > 0);
17872   match(Set dst (SubD src con));
17873 
17874   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17875   ins_cost(150);
17876   ins_encode %{
17877     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17878   %}
17879   ins_pipe(pipe_slow);
17880 %}
17881 
17882 instruct mulF_reg(regF dst, regF src) %{
17883   predicate(UseAVX == 0);
17884   match(Set dst (MulF dst src));
17885 
17886   format %{ "mulss   $dst, $src" %}
17887   ins_cost(150);
17888   ins_encode %{
17889     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17890   %}
17891   ins_pipe(pipe_slow);
17892 %}
17893 
17894 instruct mulF_mem(regF dst, memory src) %{
17895   predicate(UseAVX == 0);
17896   match(Set dst (MulF dst (LoadF src)));
17897 
17898   format %{ "mulss   $dst, $src" %}
17899   ins_cost(150);
17900   ins_encode %{
17901     __ mulss($dst$$XMMRegister, $src$$Address);
17902   %}
17903   ins_pipe(pipe_slow);
17904 %}
17905 
17906 instruct mulF_imm(regF dst, immF con) %{
17907   predicate(UseAVX == 0);
17908   match(Set dst (MulF dst con));
17909   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17910   ins_cost(150);
17911   ins_encode %{
17912     __ mulss($dst$$XMMRegister, $constantaddress($con));
17913   %}
17914   ins_pipe(pipe_slow);
17915 %}
17916 
17917 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17918   predicate(UseAVX > 0);
17919   match(Set dst (MulF src1 src2));
17920 
17921   format %{ "vmulss  $dst, $src1, $src2" %}
17922   ins_cost(150);
17923   ins_encode %{
17924     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17925   %}
17926   ins_pipe(pipe_slow);
17927 %}
17928 
17929 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17930   predicate(UseAVX > 0);
17931   match(Set dst (MulF src1 (LoadF src2)));
17932 
17933   format %{ "vmulss  $dst, $src1, $src2" %}
17934   ins_cost(150);
17935   ins_encode %{
17936     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17937   %}
17938   ins_pipe(pipe_slow);
17939 %}
17940 
17941 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17942   predicate(UseAVX > 0);
17943   match(Set dst (MulF src con));
17944 
17945   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17946   ins_cost(150);
17947   ins_encode %{
17948     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17949   %}
17950   ins_pipe(pipe_slow);
17951 %}
17952 
17953 instruct mulD_reg(regD dst, regD src) %{
17954   predicate(UseAVX == 0);
17955   match(Set dst (MulD dst src));
17956 
17957   format %{ "mulsd   $dst, $src" %}
17958   ins_cost(150);
17959   ins_encode %{
17960     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17961   %}
17962   ins_pipe(pipe_slow);
17963 %}
17964 
17965 instruct mulD_mem(regD dst, memory src) %{
17966   predicate(UseAVX == 0);
17967   match(Set dst (MulD dst (LoadD src)));
17968 
17969   format %{ "mulsd   $dst, $src" %}
17970   ins_cost(150);
17971   ins_encode %{
17972     __ mulsd($dst$$XMMRegister, $src$$Address);
17973   %}
17974   ins_pipe(pipe_slow);
17975 %}
17976 
17977 instruct mulD_imm(regD dst, immD con) %{
17978   predicate(UseAVX == 0);
17979   match(Set dst (MulD dst con));
17980   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17981   ins_cost(150);
17982   ins_encode %{
17983     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17984   %}
17985   ins_pipe(pipe_slow);
17986 %}
17987 
17988 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17989   predicate(UseAVX > 0);
17990   match(Set dst (MulD src1 src2));
17991 
17992   format %{ "vmulsd  $dst, $src1, $src2" %}
17993   ins_cost(150);
17994   ins_encode %{
17995     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17996   %}
17997   ins_pipe(pipe_slow);
17998 %}
17999 
18000 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
18001   predicate(UseAVX > 0);
18002   match(Set dst (MulD src1 (LoadD src2)));
18003 
18004   format %{ "vmulsd  $dst, $src1, $src2" %}
18005   ins_cost(150);
18006   ins_encode %{
18007     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18008   %}
18009   ins_pipe(pipe_slow);
18010 %}
18011 
18012 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
18013   predicate(UseAVX > 0);
18014   match(Set dst (MulD src con));
18015 
18016   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18017   ins_cost(150);
18018   ins_encode %{
18019     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18020   %}
18021   ins_pipe(pipe_slow);
18022 %}
18023 
18024 instruct divF_reg(regF dst, regF src) %{
18025   predicate(UseAVX == 0);
18026   match(Set dst (DivF dst src));
18027 
18028   format %{ "divss   $dst, $src" %}
18029   ins_cost(150);
18030   ins_encode %{
18031     __ divss($dst$$XMMRegister, $src$$XMMRegister);
18032   %}
18033   ins_pipe(pipe_slow);
18034 %}
18035 
18036 instruct divF_mem(regF dst, memory src) %{
18037   predicate(UseAVX == 0);
18038   match(Set dst (DivF dst (LoadF src)));
18039 
18040   format %{ "divss   $dst, $src" %}
18041   ins_cost(150);
18042   ins_encode %{
18043     __ divss($dst$$XMMRegister, $src$$Address);
18044   %}
18045   ins_pipe(pipe_slow);
18046 %}
18047 
18048 instruct divF_imm(regF dst, immF con) %{
18049   predicate(UseAVX == 0);
18050   match(Set dst (DivF dst con));
18051   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18052   ins_cost(150);
18053   ins_encode %{
18054     __ divss($dst$$XMMRegister, $constantaddress($con));
18055   %}
18056   ins_pipe(pipe_slow);
18057 %}
18058 
18059 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
18060   predicate(UseAVX > 0);
18061   match(Set dst (DivF src1 src2));
18062 
18063   format %{ "vdivss  $dst, $src1, $src2" %}
18064   ins_cost(150);
18065   ins_encode %{
18066     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18067   %}
18068   ins_pipe(pipe_slow);
18069 %}
18070 
18071 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
18072   predicate(UseAVX > 0);
18073   match(Set dst (DivF src1 (LoadF src2)));
18074 
18075   format %{ "vdivss  $dst, $src1, $src2" %}
18076   ins_cost(150);
18077   ins_encode %{
18078     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18079   %}
18080   ins_pipe(pipe_slow);
18081 %}
18082 
18083 instruct divF_reg_imm(regF dst, regF src, immF con) %{
18084   predicate(UseAVX > 0);
18085   match(Set dst (DivF src con));
18086 
18087   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18088   ins_cost(150);
18089   ins_encode %{
18090     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18091   %}
18092   ins_pipe(pipe_slow);
18093 %}
18094 
18095 instruct divD_reg(regD dst, regD src) %{
18096   predicate(UseAVX == 0);
18097   match(Set dst (DivD dst src));
18098 
18099   format %{ "divsd   $dst, $src" %}
18100   ins_cost(150);
18101   ins_encode %{
18102     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
18103   %}
18104   ins_pipe(pipe_slow);
18105 %}
18106 
18107 instruct divD_mem(regD dst, memory src) %{
18108   predicate(UseAVX == 0);
18109   match(Set dst (DivD dst (LoadD src)));
18110 
18111   format %{ "divsd   $dst, $src" %}
18112   ins_cost(150);
18113   ins_encode %{
18114     __ divsd($dst$$XMMRegister, $src$$Address);
18115   %}
18116   ins_pipe(pipe_slow);
18117 %}
18118 
18119 instruct divD_imm(regD dst, immD con) %{
18120   predicate(UseAVX == 0);
18121   match(Set dst (DivD dst con));
18122   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18123   ins_cost(150);
18124   ins_encode %{
18125     __ divsd($dst$$XMMRegister, $constantaddress($con));
18126   %}
18127   ins_pipe(pipe_slow);
18128 %}
18129 
18130 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
18131   predicate(UseAVX > 0);
18132   match(Set dst (DivD src1 src2));
18133 
18134   format %{ "vdivsd  $dst, $src1, $src2" %}
18135   ins_cost(150);
18136   ins_encode %{
18137     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18138   %}
18139   ins_pipe(pipe_slow);
18140 %}
18141 
18142 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18143   predicate(UseAVX > 0);
18144   match(Set dst (DivD src1 (LoadD src2)));
18145 
18146   format %{ "vdivsd  $dst, $src1, $src2" %}
18147   ins_cost(150);
18148   ins_encode %{
18149     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18150   %}
18151   ins_pipe(pipe_slow);
18152 %}
18153 
18154 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18155   predicate(UseAVX > 0);
18156   match(Set dst (DivD src con));
18157 
18158   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18159   ins_cost(150);
18160   ins_encode %{
18161     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18162   %}
18163   ins_pipe(pipe_slow);
18164 %}
18165 
18166 instruct absF_reg(regF dst) %{
18167   predicate(UseAVX == 0);
18168   match(Set dst (AbsF dst));
18169   ins_cost(150);
18170   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
18171   ins_encode %{
18172     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18173   %}
18174   ins_pipe(pipe_slow);
18175 %}
18176 
18177 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18178   predicate(UseAVX > 0);
18179   match(Set dst (AbsF src));
18180   ins_cost(150);
18181   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18182   ins_encode %{
18183     int vlen_enc = Assembler::AVX_128bit;
18184     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18185               ExternalAddress(float_signmask()), vlen_enc);
18186   %}
18187   ins_pipe(pipe_slow);
18188 %}
18189 
18190 instruct absD_reg(regD dst) %{
18191   predicate(UseAVX == 0);
18192   match(Set dst (AbsD dst));
18193   ins_cost(150);
18194   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
18195             "# abs double by sign masking" %}
18196   ins_encode %{
18197     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18198   %}
18199   ins_pipe(pipe_slow);
18200 %}
18201 
18202 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18203   predicate(UseAVX > 0);
18204   match(Set dst (AbsD src));
18205   ins_cost(150);
18206   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
18207             "# abs double by sign masking" %}
18208   ins_encode %{
18209     int vlen_enc = Assembler::AVX_128bit;
18210     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18211               ExternalAddress(double_signmask()), vlen_enc);
18212   %}
18213   ins_pipe(pipe_slow);
18214 %}
18215 
18216 instruct negF_reg(regF dst) %{
18217   predicate(UseAVX == 0);
18218   match(Set dst (NegF dst));
18219   ins_cost(150);
18220   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
18221   ins_encode %{
18222     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18223   %}
18224   ins_pipe(pipe_slow);
18225 %}
18226 
18227 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18228   predicate(UseAVX > 0);
18229   match(Set dst (NegF src));
18230   ins_cost(150);
18231   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18232   ins_encode %{
18233     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18234                  ExternalAddress(float_signflip()));
18235   %}
18236   ins_pipe(pipe_slow);
18237 %}
18238 
18239 instruct negD_reg(regD dst) %{
18240   predicate(UseAVX == 0);
18241   match(Set dst (NegD dst));
18242   ins_cost(150);
18243   format %{ "xorpd   $dst, [0x8000000000000000]\t"
18244             "# neg double by sign flipping" %}
18245   ins_encode %{
18246     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18247   %}
18248   ins_pipe(pipe_slow);
18249 %}
18250 
18251 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18252   predicate(UseAVX > 0);
18253   match(Set dst (NegD src));
18254   ins_cost(150);
18255   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
18256             "# neg double by sign flipping" %}
18257   ins_encode %{
18258     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18259                  ExternalAddress(double_signflip()));
18260   %}
18261   ins_pipe(pipe_slow);
18262 %}
18263 
18264 // sqrtss instruction needs destination register to be pre initialized for best performance
18265 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18266 instruct sqrtF_reg(regF dst) %{
18267   match(Set dst (SqrtF dst));
18268   format %{ "sqrtss  $dst, $dst" %}
18269   ins_encode %{
18270     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18271   %}
18272   ins_pipe(pipe_slow);
18273 %}
18274 
18275 // sqrtsd instruction needs destination register to be pre initialized for best performance
18276 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18277 instruct sqrtD_reg(regD dst) %{
18278   match(Set dst (SqrtD dst));
18279   format %{ "sqrtsd  $dst, $dst" %}
18280   ins_encode %{
18281     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18282   %}
18283   ins_pipe(pipe_slow);
18284 %}
18285 
18286 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18287   effect(TEMP tmp);
18288   match(Set dst (ConvF2HF src));
18289   ins_cost(125);
18290   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18291   ins_encode %{
18292     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18293   %}
18294   ins_pipe( pipe_slow );
18295 %}
18296 
18297 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18298   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18299   effect(TEMP ktmp, TEMP rtmp);
18300   match(Set mem (StoreC mem (ConvF2HF src)));
18301   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18302   ins_encode %{
18303     __ movl($rtmp$$Register, 0x1);
18304     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18305     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18306   %}
18307   ins_pipe( pipe_slow );
18308 %}
18309 
18310 instruct vconvF2HF(vec dst, vec src) %{
18311   match(Set dst (VectorCastF2HF src));
18312   format %{ "vector_conv_F2HF $dst $src" %}
18313   ins_encode %{
18314     int vlen_enc = vector_length_encoding(this, $src);
18315     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18316   %}
18317   ins_pipe( pipe_slow );
18318 %}
18319 
18320 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18321   predicate(n->as_StoreVector()->memory_size() >= 16);
18322   match(Set mem (StoreVector mem (VectorCastF2HF src)));
18323   format %{ "vcvtps2ph $mem,$src" %}
18324   ins_encode %{
18325     int vlen_enc = vector_length_encoding(this, $src);
18326     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18327   %}
18328   ins_pipe( pipe_slow );
18329 %}
18330 
18331 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18332   match(Set dst (ConvHF2F src));
18333   format %{ "vcvtph2ps $dst,$src" %}
18334   ins_encode %{
18335     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18336   %}
18337   ins_pipe( pipe_slow );
18338 %}
18339 
18340 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18341   match(Set dst (VectorCastHF2F (LoadVector mem)));
18342   format %{ "vcvtph2ps $dst,$mem" %}
18343   ins_encode %{
18344     int vlen_enc = vector_length_encoding(this);
18345     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18346   %}
18347   ins_pipe( pipe_slow );
18348 %}
18349 
18350 instruct vconvHF2F(vec dst, vec src) %{
18351   match(Set dst (VectorCastHF2F src));
18352   ins_cost(125);
18353   format %{ "vector_conv_HF2F $dst,$src" %}
18354   ins_encode %{
18355     int vlen_enc = vector_length_encoding(this);
18356     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18357   %}
18358   ins_pipe( pipe_slow );
18359 %}
18360 
18361 // ---------------------------------------- VectorReinterpret ------------------------------------
18362 instruct reinterpret_mask(kReg dst) %{
18363   predicate(n->bottom_type()->isa_vectmask() &&
18364             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18365   match(Set dst (VectorReinterpret dst));
18366   ins_cost(125);
18367   format %{ "vector_reinterpret $dst\t!" %}
18368   ins_encode %{
18369     // empty
18370   %}
18371   ins_pipe( pipe_slow );
18372 %}
18373 
18374 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18375   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18376             n->bottom_type()->isa_vectmask() &&
18377             n->in(1)->bottom_type()->isa_vectmask() &&
18378             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18379             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18380   match(Set dst (VectorReinterpret src));
18381   effect(TEMP xtmp);
18382   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18383   ins_encode %{
18384      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18385      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18386      assert(src_sz == dst_sz , "src and dst size mismatch");
18387      int vlen_enc = vector_length_encoding(src_sz);
18388      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18389      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18390   %}
18391   ins_pipe( pipe_slow );
18392 %}
18393 
18394 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18395   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18396             n->bottom_type()->isa_vectmask() &&
18397             n->in(1)->bottom_type()->isa_vectmask() &&
18398             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18399              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18400             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18401   match(Set dst (VectorReinterpret src));
18402   effect(TEMP xtmp);
18403   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18404   ins_encode %{
18405      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18406      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18407      assert(src_sz == dst_sz , "src and dst size mismatch");
18408      int vlen_enc = vector_length_encoding(src_sz);
18409      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18410      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18411   %}
18412   ins_pipe( pipe_slow );
18413 %}
18414 
18415 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18416   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18417             n->bottom_type()->isa_vectmask() &&
18418             n->in(1)->bottom_type()->isa_vectmask() &&
18419             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18420              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18421             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18422   match(Set dst (VectorReinterpret src));
18423   effect(TEMP xtmp);
18424   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18425   ins_encode %{
18426      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18427      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18428      assert(src_sz == dst_sz , "src and dst size mismatch");
18429      int vlen_enc = vector_length_encoding(src_sz);
18430      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18431      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18432   %}
18433   ins_pipe( pipe_slow );
18434 %}
18435 
18436 instruct reinterpret(vec dst) %{
18437   predicate(!n->bottom_type()->isa_vectmask() &&
18438             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18439   match(Set dst (VectorReinterpret dst));
18440   ins_cost(125);
18441   format %{ "vector_reinterpret $dst\t!" %}
18442   ins_encode %{
18443     // empty
18444   %}
18445   ins_pipe( pipe_slow );
18446 %}
18447 
18448 instruct reinterpret_expand(vec dst, vec src) %{
18449   predicate(UseAVX == 0 &&
18450             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18451   match(Set dst (VectorReinterpret src));
18452   ins_cost(125);
18453   effect(TEMP dst);
18454   format %{ "vector_reinterpret_expand $dst,$src" %}
18455   ins_encode %{
18456     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18457     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18458 
18459     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18460     if (src_vlen_in_bytes == 4) {
18461       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18462     } else {
18463       assert(src_vlen_in_bytes == 8, "");
18464       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18465     }
18466     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18467   %}
18468   ins_pipe( pipe_slow );
18469 %}
18470 
18471 instruct vreinterpret_expand4(legVec dst, vec src) %{
18472   predicate(UseAVX > 0 &&
18473             !n->bottom_type()->isa_vectmask() &&
18474             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18475             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18476   match(Set dst (VectorReinterpret src));
18477   ins_cost(125);
18478   format %{ "vector_reinterpret_expand $dst,$src" %}
18479   ins_encode %{
18480     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18481   %}
18482   ins_pipe( pipe_slow );
18483 %}
18484 
18485 
18486 instruct vreinterpret_expand(legVec dst, vec src) %{
18487   predicate(UseAVX > 0 &&
18488             !n->bottom_type()->isa_vectmask() &&
18489             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18490             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18491   match(Set dst (VectorReinterpret src));
18492   ins_cost(125);
18493   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18494   ins_encode %{
18495     switch (Matcher::vector_length_in_bytes(this, $src)) {
18496       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18497       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18498       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18499       default: ShouldNotReachHere();
18500     }
18501   %}
18502   ins_pipe( pipe_slow );
18503 %}
18504 
18505 instruct reinterpret_shrink(vec dst, legVec src) %{
18506   predicate(!n->bottom_type()->isa_vectmask() &&
18507             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18508   match(Set dst (VectorReinterpret src));
18509   ins_cost(125);
18510   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18511   ins_encode %{
18512     switch (Matcher::vector_length_in_bytes(this)) {
18513       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18514       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18515       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18516       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18517       default: ShouldNotReachHere();
18518     }
18519   %}
18520   ins_pipe( pipe_slow );
18521 %}
18522 
18523 // ----------------------------------------------------------------------------------------------------
18524 
18525 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18526   match(Set dst (RoundDoubleMode src rmode));
18527   format %{ "roundsd $dst,$src" %}
18528   ins_cost(150);
18529   ins_encode %{
18530     assert(UseSSE >= 4, "required");
18531     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18532       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18533     }
18534     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18535   %}
18536   ins_pipe(pipe_slow);
18537 %}
18538 
18539 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18540   match(Set dst (RoundDoubleMode con rmode));
18541   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18542   ins_cost(150);
18543   ins_encode %{
18544     assert(UseSSE >= 4, "required");
18545     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18546   %}
18547   ins_pipe(pipe_slow);
18548 %}
18549 
18550 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18551   predicate(Matcher::vector_length(n) < 8);
18552   match(Set dst (RoundDoubleModeV src rmode));
18553   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18554   ins_encode %{
18555     assert(UseAVX > 0, "required");
18556     int vlen_enc = vector_length_encoding(this);
18557     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18558   %}
18559   ins_pipe( pipe_slow );
18560 %}
18561 
18562 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18563   predicate(Matcher::vector_length(n) == 8);
18564   match(Set dst (RoundDoubleModeV src rmode));
18565   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18566   ins_encode %{
18567     assert(UseAVX > 2, "required");
18568     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18569   %}
18570   ins_pipe( pipe_slow );
18571 %}
18572 
18573 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18574   predicate(Matcher::vector_length(n) < 8);
18575   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18576   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18577   ins_encode %{
18578     assert(UseAVX > 0, "required");
18579     int vlen_enc = vector_length_encoding(this);
18580     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18581   %}
18582   ins_pipe( pipe_slow );
18583 %}
18584 
18585 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18586   predicate(Matcher::vector_length(n) == 8);
18587   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18588   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18589   ins_encode %{
18590     assert(UseAVX > 2, "required");
18591     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18592   %}
18593   ins_pipe( pipe_slow );
18594 %}
18595 
18596 instruct onspinwait() %{
18597   match(OnSpinWait);
18598   ins_cost(200);
18599 
18600   format %{
18601     $$template
18602     $$emit$$"pause\t! membar_onspinwait"
18603   %}
18604   ins_encode %{
18605     __ pause();
18606   %}
18607   ins_pipe(pipe_slow);
18608 %}
18609 
18610 // a * b + c
18611 instruct fmaD_reg(regD a, regD b, regD c) %{
18612   match(Set c (FmaD  c (Binary a b)));
18613   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18614   ins_cost(150);
18615   ins_encode %{
18616     assert(UseFMA, "Needs FMA instructions support.");
18617     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18618   %}
18619   ins_pipe( pipe_slow );
18620 %}
18621 
18622 // a * b + c
18623 instruct fmaF_reg(regF a, regF b, regF c) %{
18624   match(Set c (FmaF  c (Binary a b)));
18625   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18626   ins_cost(150);
18627   ins_encode %{
18628     assert(UseFMA, "Needs FMA instructions support.");
18629     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18630   %}
18631   ins_pipe( pipe_slow );
18632 %}
18633 
18634 // ====================VECTOR INSTRUCTIONS=====================================
18635 
18636 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18637 instruct MoveVec2Leg(legVec dst, vec src) %{
18638   match(Set dst src);
18639   format %{ "" %}
18640   ins_encode %{
18641     ShouldNotReachHere();
18642   %}
18643   ins_pipe( fpu_reg_reg );
18644 %}
18645 
18646 instruct MoveLeg2Vec(vec dst, legVec src) %{
18647   match(Set dst src);
18648   format %{ "" %}
18649   ins_encode %{
18650     ShouldNotReachHere();
18651   %}
18652   ins_pipe( fpu_reg_reg );
18653 %}
18654 
18655 // ============================================================================
18656 
18657 // Load vectors generic operand pattern
18658 instruct loadV(vec dst, memory mem) %{
18659   match(Set dst (LoadVector mem));
18660   ins_cost(125);
18661   format %{ "load_vector $dst,$mem" %}
18662   ins_encode %{
18663     BasicType bt = Matcher::vector_element_basic_type(this);
18664     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18665   %}
18666   ins_pipe( pipe_slow );
18667 %}
18668 
18669 // Store vectors generic operand pattern.
18670 instruct storeV(memory mem, vec src) %{
18671   match(Set mem (StoreVector mem src));
18672   ins_cost(145);
18673   format %{ "store_vector $mem,$src\n\t" %}
18674   ins_encode %{
18675     switch (Matcher::vector_length_in_bytes(this, $src)) {
18676       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18677       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18678       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18679       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18680       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18681       default: ShouldNotReachHere();
18682     }
18683   %}
18684   ins_pipe( pipe_slow );
18685 %}
18686 
18687 // ---------------------------------------- Gather ------------------------------------
18688 
18689 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18690 
18691 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18692   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18693             Matcher::vector_length_in_bytes(n) <= 32);
18694   match(Set dst (LoadVectorGather mem idx));
18695   effect(TEMP dst, TEMP tmp, TEMP mask);
18696   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18697   ins_encode %{
18698     int vlen_enc = vector_length_encoding(this);
18699     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18700     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18701     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18702     __ lea($tmp$$Register, $mem$$Address);
18703     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18704   %}
18705   ins_pipe( pipe_slow );
18706 %}
18707 
18708 
18709 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18710   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18711             !is_subword_type(Matcher::vector_element_basic_type(n)));
18712   match(Set dst (LoadVectorGather mem idx));
18713   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18714   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18715   ins_encode %{
18716     int vlen_enc = vector_length_encoding(this);
18717     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18718     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18719     __ lea($tmp$$Register, $mem$$Address);
18720     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18721   %}
18722   ins_pipe( pipe_slow );
18723 %}
18724 
18725 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18726   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18727             !is_subword_type(Matcher::vector_element_basic_type(n)));
18728   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18729   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18730   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18731   ins_encode %{
18732     assert(UseAVX > 2, "sanity");
18733     int vlen_enc = vector_length_encoding(this);
18734     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18735     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18736     // Note: Since gather instruction partially updates the opmask register used
18737     // for predication hense moving mask operand to a temporary.
18738     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18739     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18740     __ lea($tmp$$Register, $mem$$Address);
18741     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18742   %}
18743   ins_pipe( pipe_slow );
18744 %}
18745 
18746 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18747   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18748   match(Set dst (LoadVectorGather mem idx_base));
18749   effect(TEMP tmp, TEMP rtmp);
18750   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18751   ins_encode %{
18752     int vlen_enc = vector_length_encoding(this);
18753     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18754     __ lea($tmp$$Register, $mem$$Address);
18755     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18756   %}
18757   ins_pipe( pipe_slow );
18758 %}
18759 
18760 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18761                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18762   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18763   match(Set dst (LoadVectorGather mem idx_base));
18764   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18765   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18766   ins_encode %{
18767     int vlen_enc = vector_length_encoding(this);
18768     int vector_len = Matcher::vector_length(this);
18769     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18770     __ lea($tmp$$Register, $mem$$Address);
18771     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18772     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18773                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18774   %}
18775   ins_pipe( pipe_slow );
18776 %}
18777 
18778 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18779   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18780   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18781   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18782   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18783   ins_encode %{
18784     int vlen_enc = vector_length_encoding(this);
18785     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18786     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18787     __ lea($tmp$$Register, $mem$$Address);
18788     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18789     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18790   %}
18791   ins_pipe( pipe_slow );
18792 %}
18793 
18794 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18795                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18796   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18797   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18798   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18799   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18800   ins_encode %{
18801     int vlen_enc = vector_length_encoding(this);
18802     int vector_len = Matcher::vector_length(this);
18803     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18804     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18805     __ lea($tmp$$Register, $mem$$Address);
18806     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18807     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18808     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18809                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18810   %}
18811   ins_pipe( pipe_slow );
18812 %}
18813 
18814 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18815   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18816   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18817   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18818   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18819   ins_encode %{
18820     int vlen_enc = vector_length_encoding(this);
18821     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18822     __ lea($tmp$$Register, $mem$$Address);
18823     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18824     if (elem_bt == T_SHORT) {
18825       __ movl($mask_idx$$Register, 0x55555555);
18826       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18827     }
18828     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18829     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18830   %}
18831   ins_pipe( pipe_slow );
18832 %}
18833 
18834 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18835                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18836   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18837   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18838   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18839   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18840   ins_encode %{
18841     int vlen_enc = vector_length_encoding(this);
18842     int vector_len = Matcher::vector_length(this);
18843     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18844     __ lea($tmp$$Register, $mem$$Address);
18845     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18846     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18847     if (elem_bt == T_SHORT) {
18848       __ movl($mask_idx$$Register, 0x55555555);
18849       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18850     }
18851     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18852     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18853                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18854   %}
18855   ins_pipe( pipe_slow );
18856 %}
18857 
18858 // ====================Scatter=======================================
18859 
18860 // Scatter INT, LONG, FLOAT, DOUBLE
18861 
18862 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18863   predicate(UseAVX > 2);
18864   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18865   effect(TEMP tmp, TEMP ktmp);
18866   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18867   ins_encode %{
18868     int vlen_enc = vector_length_encoding(this, $src);
18869     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18870 
18871     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18872     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18873 
18874     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18875     __ lea($tmp$$Register, $mem$$Address);
18876     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18877   %}
18878   ins_pipe( pipe_slow );
18879 %}
18880 
18881 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18882   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18883   effect(TEMP tmp, TEMP ktmp);
18884   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18885   ins_encode %{
18886     int vlen_enc = vector_length_encoding(this, $src);
18887     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18888     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18889     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18890     // Note: Since scatter instruction partially updates the opmask register used
18891     // for predication hense moving mask operand to a temporary.
18892     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18893     __ lea($tmp$$Register, $mem$$Address);
18894     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18895   %}
18896   ins_pipe( pipe_slow );
18897 %}
18898 
18899 // ====================REPLICATE=======================================
18900 
18901 // Replicate byte scalar to be vector
18902 instruct vReplB_reg(vec dst, rRegI src) %{
18903   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18904   match(Set dst (Replicate src));
18905   format %{ "replicateB $dst,$src" %}
18906   ins_encode %{
18907     uint vlen = Matcher::vector_length(this);
18908     if (UseAVX >= 2) {
18909       int vlen_enc = vector_length_encoding(this);
18910       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18911         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18912         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18913       } else {
18914         __ movdl($dst$$XMMRegister, $src$$Register);
18915         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18916       }
18917     } else {
18918        assert(UseAVX < 2, "");
18919       __ movdl($dst$$XMMRegister, $src$$Register);
18920       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18921       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18922       if (vlen >= 16) {
18923         assert(vlen == 16, "");
18924         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18925       }
18926     }
18927   %}
18928   ins_pipe( pipe_slow );
18929 %}
18930 
18931 instruct ReplB_mem(vec dst, memory mem) %{
18932   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18933   match(Set dst (Replicate (LoadB mem)));
18934   format %{ "replicateB $dst,$mem" %}
18935   ins_encode %{
18936     int vlen_enc = vector_length_encoding(this);
18937     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18938   %}
18939   ins_pipe( pipe_slow );
18940 %}
18941 
18942 // ====================ReplicateS=======================================
18943 
18944 instruct vReplS_reg(vec dst, rRegI src) %{
18945   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18946   match(Set dst (Replicate src));
18947   format %{ "replicateS $dst,$src" %}
18948   ins_encode %{
18949     uint vlen = Matcher::vector_length(this);
18950     int vlen_enc = vector_length_encoding(this);
18951     if (UseAVX >= 2) {
18952       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18953         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18954         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18955       } else {
18956         __ movdl($dst$$XMMRegister, $src$$Register);
18957         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18958       }
18959     } else {
18960       assert(UseAVX < 2, "");
18961       __ movdl($dst$$XMMRegister, $src$$Register);
18962       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18963       if (vlen >= 8) {
18964         assert(vlen == 8, "");
18965         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18966       }
18967     }
18968   %}
18969   ins_pipe( pipe_slow );
18970 %}
18971 
18972 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18973   match(Set dst (Replicate con));
18974   effect(TEMP rtmp);
18975   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18976   ins_encode %{
18977     int vlen_enc = vector_length_encoding(this);
18978     BasicType bt = Matcher::vector_element_basic_type(this);
18979     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18980     __ movl($rtmp$$Register, $con$$constant);
18981     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18982   %}
18983   ins_pipe( pipe_slow );
18984 %}
18985 
18986 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18987   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18988   match(Set dst (Replicate src));
18989   effect(TEMP rtmp);
18990   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18991   ins_encode %{
18992     int vlen_enc = vector_length_encoding(this);
18993     __ vmovw($rtmp$$Register, $src$$XMMRegister);
18994     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18995   %}
18996   ins_pipe( pipe_slow );
18997 %}
18998 
18999 instruct ReplS_mem(vec dst, memory mem) %{
19000   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
19001   match(Set dst (Replicate (LoadS mem)));
19002   format %{ "replicateS $dst,$mem" %}
19003   ins_encode %{
19004     int vlen_enc = vector_length_encoding(this);
19005     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
19006   %}
19007   ins_pipe( pipe_slow );
19008 %}
19009 
19010 // ====================ReplicateI=======================================
19011 
19012 instruct ReplI_reg(vec dst, rRegI src) %{
19013   predicate(Matcher::vector_element_basic_type(n) == T_INT);
19014   match(Set dst (Replicate src));
19015   format %{ "replicateI $dst,$src" %}
19016   ins_encode %{
19017     uint vlen = Matcher::vector_length(this);
19018     int vlen_enc = vector_length_encoding(this);
19019     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19020       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
19021     } else if (VM_Version::supports_avx2()) {
19022       __ movdl($dst$$XMMRegister, $src$$Register);
19023       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19024     } else {
19025       __ movdl($dst$$XMMRegister, $src$$Register);
19026       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19027     }
19028   %}
19029   ins_pipe( pipe_slow );
19030 %}
19031 
19032 instruct ReplI_mem(vec dst, memory mem) %{
19033   predicate(Matcher::vector_element_basic_type(n) == T_INT);
19034   match(Set dst (Replicate (LoadI mem)));
19035   format %{ "replicateI $dst,$mem" %}
19036   ins_encode %{
19037     int vlen_enc = vector_length_encoding(this);
19038     if (VM_Version::supports_avx2()) {
19039       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19040     } else if (VM_Version::supports_avx()) {
19041       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19042     } else {
19043       __ movdl($dst$$XMMRegister, $mem$$Address);
19044       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19045     }
19046   %}
19047   ins_pipe( pipe_slow );
19048 %}
19049 
19050 instruct ReplI_imm(vec dst, immI con) %{
19051   predicate(Matcher::is_non_long_integral_vector(n));
19052   match(Set dst (Replicate con));
19053   format %{ "replicateI $dst,$con" %}
19054   ins_encode %{
19055     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
19056                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
19057                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
19058     BasicType bt = Matcher::vector_element_basic_type(this);
19059     int vlen = Matcher::vector_length_in_bytes(this);
19060     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
19061   %}
19062   ins_pipe( pipe_slow );
19063 %}
19064 
19065 // Replicate scalar zero to be vector
19066 instruct ReplI_zero(vec dst, immI_0 zero) %{
19067   predicate(Matcher::is_non_long_integral_vector(n));
19068   match(Set dst (Replicate zero));
19069   format %{ "replicateI $dst,$zero" %}
19070   ins_encode %{
19071     int vlen_enc = vector_length_encoding(this);
19072     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19073       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19074     } else {
19075       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19076     }
19077   %}
19078   ins_pipe( fpu_reg_reg );
19079 %}
19080 
19081 instruct ReplI_M1(vec dst, immI_M1 con) %{
19082   predicate(Matcher::is_non_long_integral_vector(n));
19083   match(Set dst (Replicate con));
19084   format %{ "vallones $dst" %}
19085   ins_encode %{
19086     int vector_len = vector_length_encoding(this);
19087     __ vallones($dst$$XMMRegister, vector_len);
19088   %}
19089   ins_pipe( pipe_slow );
19090 %}
19091 
19092 // ====================ReplicateL=======================================
19093 
19094 // Replicate long (8 byte) scalar to be vector
19095 instruct ReplL_reg(vec dst, rRegL src) %{
19096   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19097   match(Set dst (Replicate src));
19098   format %{ "replicateL $dst,$src" %}
19099   ins_encode %{
19100     int vlen = Matcher::vector_length(this);
19101     int vlen_enc = vector_length_encoding(this);
19102     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19103       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
19104     } else if (VM_Version::supports_avx2()) {
19105       __ movdq($dst$$XMMRegister, $src$$Register);
19106       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19107     } else {
19108       __ movdq($dst$$XMMRegister, $src$$Register);
19109       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19110     }
19111   %}
19112   ins_pipe( pipe_slow );
19113 %}
19114 
19115 instruct ReplL_mem(vec dst, memory mem) %{
19116   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19117   match(Set dst (Replicate (LoadL mem)));
19118   format %{ "replicateL $dst,$mem" %}
19119   ins_encode %{
19120     int vlen_enc = vector_length_encoding(this);
19121     if (VM_Version::supports_avx2()) {
19122       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
19123     } else if (VM_Version::supports_sse3()) {
19124       __ movddup($dst$$XMMRegister, $mem$$Address);
19125     } else {
19126       __ movq($dst$$XMMRegister, $mem$$Address);
19127       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19128     }
19129   %}
19130   ins_pipe( pipe_slow );
19131 %}
19132 
19133 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
19134 instruct ReplL_imm(vec dst, immL con) %{
19135   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19136   match(Set dst (Replicate con));
19137   format %{ "replicateL $dst,$con" %}
19138   ins_encode %{
19139     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19140     int vlen = Matcher::vector_length_in_bytes(this);
19141     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
19142   %}
19143   ins_pipe( pipe_slow );
19144 %}
19145 
19146 instruct ReplL_zero(vec dst, immL0 zero) %{
19147   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19148   match(Set dst (Replicate zero));
19149   format %{ "replicateL $dst,$zero" %}
19150   ins_encode %{
19151     int vlen_enc = vector_length_encoding(this);
19152     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19153       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19154     } else {
19155       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19156     }
19157   %}
19158   ins_pipe( fpu_reg_reg );
19159 %}
19160 
19161 instruct ReplL_M1(vec dst, immL_M1 con) %{
19162   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19163   match(Set dst (Replicate con));
19164   format %{ "vallones $dst" %}
19165   ins_encode %{
19166     int vector_len = vector_length_encoding(this);
19167     __ vallones($dst$$XMMRegister, vector_len);
19168   %}
19169   ins_pipe( pipe_slow );
19170 %}
19171 
19172 // ====================ReplicateF=======================================
19173 
19174 instruct vReplF_reg(vec dst, vlRegF src) %{
19175   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19176   match(Set dst (Replicate src));
19177   format %{ "replicateF $dst,$src" %}
19178   ins_encode %{
19179     uint vlen = Matcher::vector_length(this);
19180     int vlen_enc = vector_length_encoding(this);
19181     if (vlen <= 4) {
19182       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19183     } else if (VM_Version::supports_avx2()) {
19184       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19185     } else {
19186       assert(vlen == 8, "sanity");
19187       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19188       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19189     }
19190   %}
19191   ins_pipe( pipe_slow );
19192 %}
19193 
19194 instruct ReplF_reg(vec dst, vlRegF src) %{
19195   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19196   match(Set dst (Replicate src));
19197   format %{ "replicateF $dst,$src" %}
19198   ins_encode %{
19199     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19200   %}
19201   ins_pipe( pipe_slow );
19202 %}
19203 
19204 instruct ReplF_mem(vec dst, memory mem) %{
19205   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19206   match(Set dst (Replicate (LoadF mem)));
19207   format %{ "replicateF $dst,$mem" %}
19208   ins_encode %{
19209     int vlen_enc = vector_length_encoding(this);
19210     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19211   %}
19212   ins_pipe( pipe_slow );
19213 %}
19214 
19215 // Replicate float scalar immediate to be vector by loading from const table.
19216 instruct ReplF_imm(vec dst, immF con) %{
19217   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19218   match(Set dst (Replicate con));
19219   format %{ "replicateF $dst,$con" %}
19220   ins_encode %{
19221     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19222                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19223     int vlen = Matcher::vector_length_in_bytes(this);
19224     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19225   %}
19226   ins_pipe( pipe_slow );
19227 %}
19228 
19229 instruct ReplF_zero(vec dst, immF0 zero) %{
19230   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19231   match(Set dst (Replicate zero));
19232   format %{ "replicateF $dst,$zero" %}
19233   ins_encode %{
19234     int vlen_enc = vector_length_encoding(this);
19235     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19236       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19237     } else {
19238       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19239     }
19240   %}
19241   ins_pipe( fpu_reg_reg );
19242 %}
19243 
19244 // ====================ReplicateD=======================================
19245 
19246 // Replicate double (8 bytes) scalar to be vector
19247 instruct vReplD_reg(vec dst, vlRegD src) %{
19248   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19249   match(Set dst (Replicate src));
19250   format %{ "replicateD $dst,$src" %}
19251   ins_encode %{
19252     uint vlen = Matcher::vector_length(this);
19253     int vlen_enc = vector_length_encoding(this);
19254     if (vlen <= 2) {
19255       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19256     } else if (VM_Version::supports_avx2()) {
19257       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19258     } else {
19259       assert(vlen == 4, "sanity");
19260       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19261       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19262     }
19263   %}
19264   ins_pipe( pipe_slow );
19265 %}
19266 
19267 instruct ReplD_reg(vec dst, vlRegD src) %{
19268   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19269   match(Set dst (Replicate src));
19270   format %{ "replicateD $dst,$src" %}
19271   ins_encode %{
19272     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19273   %}
19274   ins_pipe( pipe_slow );
19275 %}
19276 
19277 instruct ReplD_mem(vec dst, memory mem) %{
19278   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19279   match(Set dst (Replicate (LoadD mem)));
19280   format %{ "replicateD $dst,$mem" %}
19281   ins_encode %{
19282     if (Matcher::vector_length(this) >= 4) {
19283       int vlen_enc = vector_length_encoding(this);
19284       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19285     } else {
19286       __ movddup($dst$$XMMRegister, $mem$$Address);
19287     }
19288   %}
19289   ins_pipe( pipe_slow );
19290 %}
19291 
19292 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19293 instruct ReplD_imm(vec dst, immD con) %{
19294   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19295   match(Set dst (Replicate con));
19296   format %{ "replicateD $dst,$con" %}
19297   ins_encode %{
19298     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19299     int vlen = Matcher::vector_length_in_bytes(this);
19300     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19301   %}
19302   ins_pipe( pipe_slow );
19303 %}
19304 
19305 instruct ReplD_zero(vec dst, immD0 zero) %{
19306   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19307   match(Set dst (Replicate zero));
19308   format %{ "replicateD $dst,$zero" %}
19309   ins_encode %{
19310     int vlen_enc = vector_length_encoding(this);
19311     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19312       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19313     } else {
19314       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19315     }
19316   %}
19317   ins_pipe( fpu_reg_reg );
19318 %}
19319 
19320 // ====================VECTOR INSERT=======================================
19321 
19322 instruct insert(vec dst, rRegI val, immU8 idx) %{
19323   predicate(Matcher::vector_length_in_bytes(n) < 32);
19324   match(Set dst (VectorInsert (Binary dst val) idx));
19325   format %{ "vector_insert $dst,$val,$idx" %}
19326   ins_encode %{
19327     assert(UseSSE >= 4, "required");
19328     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19329 
19330     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19331 
19332     assert(is_integral_type(elem_bt), "");
19333     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19334 
19335     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19336   %}
19337   ins_pipe( pipe_slow );
19338 %}
19339 
19340 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19341   predicate(Matcher::vector_length_in_bytes(n) == 32);
19342   match(Set dst (VectorInsert (Binary src val) idx));
19343   effect(TEMP vtmp);
19344   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19345   ins_encode %{
19346     int vlen_enc = Assembler::AVX_256bit;
19347     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19348     int elem_per_lane = 16/type2aelembytes(elem_bt);
19349     int log2epr = log2(elem_per_lane);
19350 
19351     assert(is_integral_type(elem_bt), "sanity");
19352     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19353 
19354     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19355     uint y_idx = ($idx$$constant >> log2epr) & 1;
19356     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19357     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19358     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19359   %}
19360   ins_pipe( pipe_slow );
19361 %}
19362 
19363 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19364   predicate(Matcher::vector_length_in_bytes(n) == 64);
19365   match(Set dst (VectorInsert (Binary src val) idx));
19366   effect(TEMP vtmp);
19367   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19368   ins_encode %{
19369     assert(UseAVX > 2, "sanity");
19370 
19371     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19372     int elem_per_lane = 16/type2aelembytes(elem_bt);
19373     int log2epr = log2(elem_per_lane);
19374 
19375     assert(is_integral_type(elem_bt), "");
19376     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19377 
19378     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19379     uint y_idx = ($idx$$constant >> log2epr) & 3;
19380     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19381     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19382     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19383   %}
19384   ins_pipe( pipe_slow );
19385 %}
19386 
19387 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19388   predicate(Matcher::vector_length(n) == 2);
19389   match(Set dst (VectorInsert (Binary dst val) idx));
19390   format %{ "vector_insert $dst,$val,$idx" %}
19391   ins_encode %{
19392     assert(UseSSE >= 4, "required");
19393     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19394     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19395 
19396     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19397   %}
19398   ins_pipe( pipe_slow );
19399 %}
19400 
19401 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19402   predicate(Matcher::vector_length(n) == 4);
19403   match(Set dst (VectorInsert (Binary src val) idx));
19404   effect(TEMP vtmp);
19405   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19406   ins_encode %{
19407     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19408     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19409 
19410     uint x_idx = $idx$$constant & right_n_bits(1);
19411     uint y_idx = ($idx$$constant >> 1) & 1;
19412     int vlen_enc = Assembler::AVX_256bit;
19413     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19414     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19415     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19416   %}
19417   ins_pipe( pipe_slow );
19418 %}
19419 
19420 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19421   predicate(Matcher::vector_length(n) == 8);
19422   match(Set dst (VectorInsert (Binary src val) idx));
19423   effect(TEMP vtmp);
19424   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19425   ins_encode %{
19426     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19427     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19428 
19429     uint x_idx = $idx$$constant & right_n_bits(1);
19430     uint y_idx = ($idx$$constant >> 1) & 3;
19431     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19432     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19433     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19434   %}
19435   ins_pipe( pipe_slow );
19436 %}
19437 
19438 instruct insertF(vec dst, regF val, immU8 idx) %{
19439   predicate(Matcher::vector_length(n) < 8);
19440   match(Set dst (VectorInsert (Binary dst val) idx));
19441   format %{ "vector_insert $dst,$val,$idx" %}
19442   ins_encode %{
19443     assert(UseSSE >= 4, "sanity");
19444 
19445     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19446     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19447 
19448     uint x_idx = $idx$$constant & right_n_bits(2);
19449     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19450   %}
19451   ins_pipe( pipe_slow );
19452 %}
19453 
19454 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19455   predicate(Matcher::vector_length(n) >= 8);
19456   match(Set dst (VectorInsert (Binary src val) idx));
19457   effect(TEMP vtmp);
19458   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19459   ins_encode %{
19460     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19461     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19462 
19463     int vlen = Matcher::vector_length(this);
19464     uint x_idx = $idx$$constant & right_n_bits(2);
19465     if (vlen == 8) {
19466       uint y_idx = ($idx$$constant >> 2) & 1;
19467       int vlen_enc = Assembler::AVX_256bit;
19468       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19469       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19470       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19471     } else {
19472       assert(vlen == 16, "sanity");
19473       uint y_idx = ($idx$$constant >> 2) & 3;
19474       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19475       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19476       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19477     }
19478   %}
19479   ins_pipe( pipe_slow );
19480 %}
19481 
19482 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19483   predicate(Matcher::vector_length(n) == 2);
19484   match(Set dst (VectorInsert (Binary dst val) idx));
19485   effect(TEMP tmp);
19486   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19487   ins_encode %{
19488     assert(UseSSE >= 4, "sanity");
19489     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19490     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19491 
19492     __ movq($tmp$$Register, $val$$XMMRegister);
19493     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19494   %}
19495   ins_pipe( pipe_slow );
19496 %}
19497 
19498 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19499   predicate(Matcher::vector_length(n) == 4);
19500   match(Set dst (VectorInsert (Binary src val) idx));
19501   effect(TEMP vtmp, TEMP tmp);
19502   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19503   ins_encode %{
19504     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19505     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19506 
19507     uint x_idx = $idx$$constant & right_n_bits(1);
19508     uint y_idx = ($idx$$constant >> 1) & 1;
19509     int vlen_enc = Assembler::AVX_256bit;
19510     __ movq($tmp$$Register, $val$$XMMRegister);
19511     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19512     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19513     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19514   %}
19515   ins_pipe( pipe_slow );
19516 %}
19517 
19518 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19519   predicate(Matcher::vector_length(n) == 8);
19520   match(Set dst (VectorInsert (Binary src val) idx));
19521   effect(TEMP tmp, TEMP vtmp);
19522   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19523   ins_encode %{
19524     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19525     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19526 
19527     uint x_idx = $idx$$constant & right_n_bits(1);
19528     uint y_idx = ($idx$$constant >> 1) & 3;
19529     __ movq($tmp$$Register, $val$$XMMRegister);
19530     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19531     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19532     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19533   %}
19534   ins_pipe( pipe_slow );
19535 %}
19536 
19537 // ====================REDUCTION ARITHMETIC=======================================
19538 
19539 // =======================Int Reduction==========================================
19540 
19541 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19542   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19543   match(Set dst (AddReductionVI src1 src2));
19544   match(Set dst (MulReductionVI src1 src2));
19545   match(Set dst (AndReductionV  src1 src2));
19546   match(Set dst ( OrReductionV  src1 src2));
19547   match(Set dst (XorReductionV  src1 src2));
19548   match(Set dst (MinReductionV  src1 src2));
19549   match(Set dst (MaxReductionV  src1 src2));
19550   effect(TEMP vtmp1, TEMP vtmp2);
19551   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19552   ins_encode %{
19553     int opcode = this->ideal_Opcode();
19554     int vlen = Matcher::vector_length(this, $src2);
19555     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19556   %}
19557   ins_pipe( pipe_slow );
19558 %}
19559 
19560 // =======================Long Reduction==========================================
19561 
19562 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19563   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19564   match(Set dst (AddReductionVL src1 src2));
19565   match(Set dst (MulReductionVL src1 src2));
19566   match(Set dst (AndReductionV  src1 src2));
19567   match(Set dst ( OrReductionV  src1 src2));
19568   match(Set dst (XorReductionV  src1 src2));
19569   match(Set dst (MinReductionV  src1 src2));
19570   match(Set dst (MaxReductionV  src1 src2));
19571   effect(TEMP vtmp1, TEMP vtmp2);
19572   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19573   ins_encode %{
19574     int opcode = this->ideal_Opcode();
19575     int vlen = Matcher::vector_length(this, $src2);
19576     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19577   %}
19578   ins_pipe( pipe_slow );
19579 %}
19580 
19581 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19582   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19583   match(Set dst (AddReductionVL src1 src2));
19584   match(Set dst (MulReductionVL src1 src2));
19585   match(Set dst (AndReductionV  src1 src2));
19586   match(Set dst ( OrReductionV  src1 src2));
19587   match(Set dst (XorReductionV  src1 src2));
19588   match(Set dst (MinReductionV  src1 src2));
19589   match(Set dst (MaxReductionV  src1 src2));
19590   effect(TEMP vtmp1, TEMP vtmp2);
19591   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19592   ins_encode %{
19593     int opcode = this->ideal_Opcode();
19594     int vlen = Matcher::vector_length(this, $src2);
19595     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19596   %}
19597   ins_pipe( pipe_slow );
19598 %}
19599 
19600 // =======================Float Reduction==========================================
19601 
19602 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19603   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19604   match(Set dst (AddReductionVF dst src));
19605   match(Set dst (MulReductionVF dst src));
19606   effect(TEMP dst, TEMP vtmp);
19607   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19608   ins_encode %{
19609     int opcode = this->ideal_Opcode();
19610     int vlen = Matcher::vector_length(this, $src);
19611     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19612   %}
19613   ins_pipe( pipe_slow );
19614 %}
19615 
19616 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19617   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19618   match(Set dst (AddReductionVF dst src));
19619   match(Set dst (MulReductionVF dst src));
19620   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19621   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19622   ins_encode %{
19623     int opcode = this->ideal_Opcode();
19624     int vlen = Matcher::vector_length(this, $src);
19625     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19626   %}
19627   ins_pipe( pipe_slow );
19628 %}
19629 
19630 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19631   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19632   match(Set dst (AddReductionVF dst src));
19633   match(Set dst (MulReductionVF dst src));
19634   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19635   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19636   ins_encode %{
19637     int opcode = this->ideal_Opcode();
19638     int vlen = Matcher::vector_length(this, $src);
19639     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19640   %}
19641   ins_pipe( pipe_slow );
19642 %}
19643 
19644 
19645 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19646   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19647   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19648   // src1 contains reduction identity
19649   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19650   match(Set dst (AddReductionVF src1 src2));
19651   match(Set dst (MulReductionVF src1 src2));
19652   effect(TEMP dst);
19653   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19654   ins_encode %{
19655     int opcode = this->ideal_Opcode();
19656     int vlen = Matcher::vector_length(this, $src2);
19657     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19658   %}
19659   ins_pipe( pipe_slow );
19660 %}
19661 
19662 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19663   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19664   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19665   // src1 contains reduction identity
19666   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19667   match(Set dst (AddReductionVF src1 src2));
19668   match(Set dst (MulReductionVF src1 src2));
19669   effect(TEMP dst, TEMP vtmp);
19670   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19671   ins_encode %{
19672     int opcode = this->ideal_Opcode();
19673     int vlen = Matcher::vector_length(this, $src2);
19674     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19675   %}
19676   ins_pipe( pipe_slow );
19677 %}
19678 
19679 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19680   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19681   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19682   // src1 contains reduction identity
19683   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19684   match(Set dst (AddReductionVF src1 src2));
19685   match(Set dst (MulReductionVF src1 src2));
19686   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19687   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19688   ins_encode %{
19689     int opcode = this->ideal_Opcode();
19690     int vlen = Matcher::vector_length(this, $src2);
19691     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19692   %}
19693   ins_pipe( pipe_slow );
19694 %}
19695 
19696 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19697   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19698   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19699   // src1 contains reduction identity
19700   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19701   match(Set dst (AddReductionVF src1 src2));
19702   match(Set dst (MulReductionVF src1 src2));
19703   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19704   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19705   ins_encode %{
19706     int opcode = this->ideal_Opcode();
19707     int vlen = Matcher::vector_length(this, $src2);
19708     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19709   %}
19710   ins_pipe( pipe_slow );
19711 %}
19712 
19713 // =======================Double Reduction==========================================
19714 
19715 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19716   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19717   match(Set dst (AddReductionVD dst src));
19718   match(Set dst (MulReductionVD dst src));
19719   effect(TEMP dst, TEMP vtmp);
19720   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19721   ins_encode %{
19722     int opcode = this->ideal_Opcode();
19723     int vlen = Matcher::vector_length(this, $src);
19724     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19725 %}
19726   ins_pipe( pipe_slow );
19727 %}
19728 
19729 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19730   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19731   match(Set dst (AddReductionVD dst src));
19732   match(Set dst (MulReductionVD dst src));
19733   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19734   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19735   ins_encode %{
19736     int opcode = this->ideal_Opcode();
19737     int vlen = Matcher::vector_length(this, $src);
19738     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19739   %}
19740   ins_pipe( pipe_slow );
19741 %}
19742 
19743 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19744   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19745   match(Set dst (AddReductionVD dst src));
19746   match(Set dst (MulReductionVD dst src));
19747   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19748   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19749   ins_encode %{
19750     int opcode = this->ideal_Opcode();
19751     int vlen = Matcher::vector_length(this, $src);
19752     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19753   %}
19754   ins_pipe( pipe_slow );
19755 %}
19756 
19757 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19758   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19759   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19760   // src1 contains reduction identity
19761   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19762   match(Set dst (AddReductionVD src1 src2));
19763   match(Set dst (MulReductionVD src1 src2));
19764   effect(TEMP dst);
19765   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19766   ins_encode %{
19767     int opcode = this->ideal_Opcode();
19768     int vlen = Matcher::vector_length(this, $src2);
19769     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19770 %}
19771   ins_pipe( pipe_slow );
19772 %}
19773 
19774 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19775   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19776   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19777   // src1 contains reduction identity
19778   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19779   match(Set dst (AddReductionVD src1 src2));
19780   match(Set dst (MulReductionVD src1 src2));
19781   effect(TEMP dst, TEMP vtmp);
19782   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19783   ins_encode %{
19784     int opcode = this->ideal_Opcode();
19785     int vlen = Matcher::vector_length(this, $src2);
19786     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19787   %}
19788   ins_pipe( pipe_slow );
19789 %}
19790 
19791 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19792   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19793   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19794   // src1 contains reduction identity
19795   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19796   match(Set dst (AddReductionVD src1 src2));
19797   match(Set dst (MulReductionVD src1 src2));
19798   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19799   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19800   ins_encode %{
19801     int opcode = this->ideal_Opcode();
19802     int vlen = Matcher::vector_length(this, $src2);
19803     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19804   %}
19805   ins_pipe( pipe_slow );
19806 %}
19807 
19808 // =======================Byte Reduction==========================================
19809 
19810 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19811   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19812   match(Set dst (AddReductionVI src1 src2));
19813   match(Set dst (AndReductionV  src1 src2));
19814   match(Set dst ( OrReductionV  src1 src2));
19815   match(Set dst (XorReductionV  src1 src2));
19816   match(Set dst (MinReductionV  src1 src2));
19817   match(Set dst (MaxReductionV  src1 src2));
19818   effect(TEMP vtmp1, TEMP vtmp2);
19819   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19820   ins_encode %{
19821     int opcode = this->ideal_Opcode();
19822     int vlen = Matcher::vector_length(this, $src2);
19823     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19824   %}
19825   ins_pipe( pipe_slow );
19826 %}
19827 
19828 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19829   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19830   match(Set dst (AddReductionVI src1 src2));
19831   match(Set dst (AndReductionV  src1 src2));
19832   match(Set dst ( OrReductionV  src1 src2));
19833   match(Set dst (XorReductionV  src1 src2));
19834   match(Set dst (MinReductionV  src1 src2));
19835   match(Set dst (MaxReductionV  src1 src2));
19836   effect(TEMP vtmp1, TEMP vtmp2);
19837   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19838   ins_encode %{
19839     int opcode = this->ideal_Opcode();
19840     int vlen = Matcher::vector_length(this, $src2);
19841     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19842   %}
19843   ins_pipe( pipe_slow );
19844 %}
19845 
19846 // =======================Short Reduction==========================================
19847 
19848 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19849   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19850   match(Set dst (AddReductionVI src1 src2));
19851   match(Set dst (MulReductionVI src1 src2));
19852   match(Set dst (AndReductionV  src1 src2));
19853   match(Set dst ( OrReductionV  src1 src2));
19854   match(Set dst (XorReductionV  src1 src2));
19855   match(Set dst (MinReductionV  src1 src2));
19856   match(Set dst (MaxReductionV  src1 src2));
19857   effect(TEMP vtmp1, TEMP vtmp2);
19858   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19859   ins_encode %{
19860     int opcode = this->ideal_Opcode();
19861     int vlen = Matcher::vector_length(this, $src2);
19862     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19863   %}
19864   ins_pipe( pipe_slow );
19865 %}
19866 
19867 // =======================Mul Reduction==========================================
19868 
19869 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19870   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19871             Matcher::vector_length(n->in(2)) <= 32); // src2
19872   match(Set dst (MulReductionVI src1 src2));
19873   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19874   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19875   ins_encode %{
19876     int opcode = this->ideal_Opcode();
19877     int vlen = Matcher::vector_length(this, $src2);
19878     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19879   %}
19880   ins_pipe( pipe_slow );
19881 %}
19882 
19883 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19884   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19885             Matcher::vector_length(n->in(2)) == 64); // src2
19886   match(Set dst (MulReductionVI src1 src2));
19887   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19888   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19889   ins_encode %{
19890     int opcode = this->ideal_Opcode();
19891     int vlen = Matcher::vector_length(this, $src2);
19892     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19893   %}
19894   ins_pipe( pipe_slow );
19895 %}
19896 
19897 //--------------------Min/Max Float Reduction --------------------
19898 // Float Min Reduction
19899 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19900                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19901   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19902             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19903              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19904             Matcher::vector_length(n->in(2)) == 2);
19905   match(Set dst (MinReductionV src1 src2));
19906   match(Set dst (MaxReductionV src1 src2));
19907   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19908   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19909   ins_encode %{
19910     assert(UseAVX > 0, "sanity");
19911 
19912     int opcode = this->ideal_Opcode();
19913     int vlen = Matcher::vector_length(this, $src2);
19914     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19915                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19916   %}
19917   ins_pipe( pipe_slow );
19918 %}
19919 
19920 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19921                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19922   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19923             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19924              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19925             Matcher::vector_length(n->in(2)) >= 4);
19926   match(Set dst (MinReductionV src1 src2));
19927   match(Set dst (MaxReductionV src1 src2));
19928   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19929   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19930   ins_encode %{
19931     assert(UseAVX > 0, "sanity");
19932 
19933     int opcode = this->ideal_Opcode();
19934     int vlen = Matcher::vector_length(this, $src2);
19935     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19936                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19937   %}
19938   ins_pipe( pipe_slow );
19939 %}
19940 
19941 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19942                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19943   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19944             Matcher::vector_length(n->in(2)) == 2);
19945   match(Set dst (MinReductionV dst src));
19946   match(Set dst (MaxReductionV dst src));
19947   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19948   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19949   ins_encode %{
19950     assert(UseAVX > 0, "sanity");
19951 
19952     int opcode = this->ideal_Opcode();
19953     int vlen = Matcher::vector_length(this, $src);
19954     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19955                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19956   %}
19957   ins_pipe( pipe_slow );
19958 %}
19959 
19960 
19961 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19962                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19963   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19964             Matcher::vector_length(n->in(2)) >= 4);
19965   match(Set dst (MinReductionV dst src));
19966   match(Set dst (MaxReductionV dst src));
19967   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19968   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19969   ins_encode %{
19970     assert(UseAVX > 0, "sanity");
19971 
19972     int opcode = this->ideal_Opcode();
19973     int vlen = Matcher::vector_length(this, $src);
19974     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19975                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19976   %}
19977   ins_pipe( pipe_slow );
19978 %}
19979 
19980 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19981   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19982             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19983              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19984             Matcher::vector_length(n->in(2)) == 2);
19985   match(Set dst (MinReductionV src1 src2));
19986   match(Set dst (MaxReductionV src1 src2));
19987   effect(TEMP dst, TEMP xtmp1);
19988   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19989   ins_encode %{
19990     int opcode = this->ideal_Opcode();
19991     int vlen = Matcher::vector_length(this, $src2);
19992     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19993                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19994   %}
19995   ins_pipe( pipe_slow );
19996 %}
19997 
19998 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19999   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20000             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20001              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20002             Matcher::vector_length(n->in(2)) >= 4);
20003   match(Set dst (MinReductionV src1 src2));
20004   match(Set dst (MaxReductionV src1 src2));
20005   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20006   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
20007   ins_encode %{
20008     int opcode = this->ideal_Opcode();
20009     int vlen = Matcher::vector_length(this, $src2);
20010     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20011                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20012   %}
20013   ins_pipe( pipe_slow );
20014 %}
20015 
20016 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
20017   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20018             Matcher::vector_length(n->in(2)) == 2);
20019   match(Set dst (MinReductionV dst src));
20020   match(Set dst (MaxReductionV dst src));
20021   effect(TEMP dst, TEMP xtmp1);
20022   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
20023   ins_encode %{
20024     int opcode = this->ideal_Opcode();
20025     int vlen = Matcher::vector_length(this, $src);
20026     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20027                          $xtmp1$$XMMRegister);
20028   %}
20029   ins_pipe( pipe_slow );
20030 %}
20031 
20032 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
20033   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20034             Matcher::vector_length(n->in(2)) >= 4);
20035   match(Set dst (MinReductionV dst src));
20036   match(Set dst (MaxReductionV dst src));
20037   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20038   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
20039   ins_encode %{
20040     int opcode = this->ideal_Opcode();
20041     int vlen = Matcher::vector_length(this, $src);
20042     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20043                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20044   %}
20045   ins_pipe( pipe_slow );
20046 %}
20047 
20048 //--------------------Min Double Reduction --------------------
20049 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20050                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20051   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20052             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20053              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20054             Matcher::vector_length(n->in(2)) == 2);
20055   match(Set dst (MinReductionV src1 src2));
20056   match(Set dst (MaxReductionV src1 src2));
20057   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20058   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20059   ins_encode %{
20060     assert(UseAVX > 0, "sanity");
20061 
20062     int opcode = this->ideal_Opcode();
20063     int vlen = Matcher::vector_length(this, $src2);
20064     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20065                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20066   %}
20067   ins_pipe( pipe_slow );
20068 %}
20069 
20070 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20071                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20072   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20073             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20074              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20075             Matcher::vector_length(n->in(2)) >= 4);
20076   match(Set dst (MinReductionV src1 src2));
20077   match(Set dst (MaxReductionV src1 src2));
20078   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20079   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20080   ins_encode %{
20081     assert(UseAVX > 0, "sanity");
20082 
20083     int opcode = this->ideal_Opcode();
20084     int vlen = Matcher::vector_length(this, $src2);
20085     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20086                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20087   %}
20088   ins_pipe( pipe_slow );
20089 %}
20090 
20091 
20092 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
20093                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20094   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20095             Matcher::vector_length(n->in(2)) == 2);
20096   match(Set dst (MinReductionV dst src));
20097   match(Set dst (MaxReductionV dst src));
20098   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20099   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20100   ins_encode %{
20101     assert(UseAVX > 0, "sanity");
20102 
20103     int opcode = this->ideal_Opcode();
20104     int vlen = Matcher::vector_length(this, $src);
20105     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20106                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20107   %}
20108   ins_pipe( pipe_slow );
20109 %}
20110 
20111 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
20112                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20113   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20114             Matcher::vector_length(n->in(2)) >= 4);
20115   match(Set dst (MinReductionV dst src));
20116   match(Set dst (MaxReductionV dst src));
20117   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20118   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20119   ins_encode %{
20120     assert(UseAVX > 0, "sanity");
20121 
20122     int opcode = this->ideal_Opcode();
20123     int vlen = Matcher::vector_length(this, $src);
20124     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20125                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20126   %}
20127   ins_pipe( pipe_slow );
20128 %}
20129 
20130 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
20131   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20132             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20133              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20134             Matcher::vector_length(n->in(2)) == 2);
20135   match(Set dst (MinReductionV src1 src2));
20136   match(Set dst (MaxReductionV src1 src2));
20137   effect(TEMP dst, TEMP xtmp1);
20138   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20139   ins_encode %{
20140     int opcode = this->ideal_Opcode();
20141     int vlen = Matcher::vector_length(this, $src2);
20142     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20143                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
20144   %}
20145   ins_pipe( pipe_slow );
20146 %}
20147 
20148 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20149   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20150             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20151              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20152             Matcher::vector_length(n->in(2)) >= 4);
20153   match(Set dst (MinReductionV src1 src2));
20154   match(Set dst (MaxReductionV src1 src2));
20155   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20156   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20157   ins_encode %{
20158     int opcode = this->ideal_Opcode();
20159     int vlen = Matcher::vector_length(this, $src2);
20160     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20161                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20162   %}
20163   ins_pipe( pipe_slow );
20164 %}
20165 
20166 
20167 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20168   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20169             Matcher::vector_length(n->in(2)) == 2);
20170   match(Set dst (MinReductionV dst src));
20171   match(Set dst (MaxReductionV dst src));
20172   effect(TEMP dst, TEMP xtmp1);
20173   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20174   ins_encode %{
20175     int opcode = this->ideal_Opcode();
20176     int vlen = Matcher::vector_length(this, $src);
20177     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20178                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20179   %}
20180   ins_pipe( pipe_slow );
20181 %}
20182 
20183 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20184   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20185             Matcher::vector_length(n->in(2)) >= 4);
20186   match(Set dst (MinReductionV dst src));
20187   match(Set dst (MaxReductionV dst src));
20188   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20189   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20190   ins_encode %{
20191     int opcode = this->ideal_Opcode();
20192     int vlen = Matcher::vector_length(this, $src);
20193     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20194                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20195   %}
20196   ins_pipe( pipe_slow );
20197 %}
20198 
20199 // ====================VECTOR ARITHMETIC=======================================
20200 
20201 // --------------------------------- ADD --------------------------------------
20202 
20203 // Bytes vector add
20204 instruct vaddB(vec dst, vec src) %{
20205   predicate(UseAVX == 0);
20206   match(Set dst (AddVB dst src));
20207   format %{ "paddb   $dst,$src\t! add packedB" %}
20208   ins_encode %{
20209     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20210   %}
20211   ins_pipe( pipe_slow );
20212 %}
20213 
20214 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20215   predicate(UseAVX > 0);
20216   match(Set dst (AddVB src1 src2));
20217   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
20218   ins_encode %{
20219     int vlen_enc = vector_length_encoding(this);
20220     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20221   %}
20222   ins_pipe( pipe_slow );
20223 %}
20224 
20225 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20226   predicate((UseAVX > 0) &&
20227             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20228   match(Set dst (AddVB src (LoadVector mem)));
20229   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
20230   ins_encode %{
20231     int vlen_enc = vector_length_encoding(this);
20232     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20233   %}
20234   ins_pipe( pipe_slow );
20235 %}
20236 
20237 // Shorts/Chars vector add
20238 instruct vaddS(vec dst, vec src) %{
20239   predicate(UseAVX == 0);
20240   match(Set dst (AddVS dst src));
20241   format %{ "paddw   $dst,$src\t! add packedS" %}
20242   ins_encode %{
20243     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20244   %}
20245   ins_pipe( pipe_slow );
20246 %}
20247 
20248 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20249   predicate(UseAVX > 0);
20250   match(Set dst (AddVS src1 src2));
20251   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
20252   ins_encode %{
20253     int vlen_enc = vector_length_encoding(this);
20254     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20255   %}
20256   ins_pipe( pipe_slow );
20257 %}
20258 
20259 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20260   predicate((UseAVX > 0) &&
20261             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20262   match(Set dst (AddVS src (LoadVector mem)));
20263   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
20264   ins_encode %{
20265     int vlen_enc = vector_length_encoding(this);
20266     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20267   %}
20268   ins_pipe( pipe_slow );
20269 %}
20270 
20271 // Integers vector add
20272 instruct vaddI(vec dst, vec src) %{
20273   predicate(UseAVX == 0);
20274   match(Set dst (AddVI dst src));
20275   format %{ "paddd   $dst,$src\t! add packedI" %}
20276   ins_encode %{
20277     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20278   %}
20279   ins_pipe( pipe_slow );
20280 %}
20281 
20282 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20283   predicate(UseAVX > 0);
20284   match(Set dst (AddVI src1 src2));
20285   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
20286   ins_encode %{
20287     int vlen_enc = vector_length_encoding(this);
20288     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20289   %}
20290   ins_pipe( pipe_slow );
20291 %}
20292 
20293 
20294 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20295   predicate((UseAVX > 0) &&
20296             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20297   match(Set dst (AddVI src (LoadVector mem)));
20298   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
20299   ins_encode %{
20300     int vlen_enc = vector_length_encoding(this);
20301     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20302   %}
20303   ins_pipe( pipe_slow );
20304 %}
20305 
20306 // Longs vector add
20307 instruct vaddL(vec dst, vec src) %{
20308   predicate(UseAVX == 0);
20309   match(Set dst (AddVL dst src));
20310   format %{ "paddq   $dst,$src\t! add packedL" %}
20311   ins_encode %{
20312     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20313   %}
20314   ins_pipe( pipe_slow );
20315 %}
20316 
20317 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20318   predicate(UseAVX > 0);
20319   match(Set dst (AddVL src1 src2));
20320   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
20321   ins_encode %{
20322     int vlen_enc = vector_length_encoding(this);
20323     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20324   %}
20325   ins_pipe( pipe_slow );
20326 %}
20327 
20328 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20329   predicate((UseAVX > 0) &&
20330             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20331   match(Set dst (AddVL src (LoadVector mem)));
20332   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
20333   ins_encode %{
20334     int vlen_enc = vector_length_encoding(this);
20335     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20336   %}
20337   ins_pipe( pipe_slow );
20338 %}
20339 
20340 // Floats vector add
20341 instruct vaddF(vec dst, vec src) %{
20342   predicate(UseAVX == 0);
20343   match(Set dst (AddVF dst src));
20344   format %{ "addps   $dst,$src\t! add packedF" %}
20345   ins_encode %{
20346     __ addps($dst$$XMMRegister, $src$$XMMRegister);
20347   %}
20348   ins_pipe( pipe_slow );
20349 %}
20350 
20351 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20352   predicate(UseAVX > 0);
20353   match(Set dst (AddVF src1 src2));
20354   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20355   ins_encode %{
20356     int vlen_enc = vector_length_encoding(this);
20357     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20358   %}
20359   ins_pipe( pipe_slow );
20360 %}
20361 
20362 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20363   predicate((UseAVX > 0) &&
20364             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20365   match(Set dst (AddVF src (LoadVector mem)));
20366   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20367   ins_encode %{
20368     int vlen_enc = vector_length_encoding(this);
20369     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20370   %}
20371   ins_pipe( pipe_slow );
20372 %}
20373 
20374 // Doubles vector add
20375 instruct vaddD(vec dst, vec src) %{
20376   predicate(UseAVX == 0);
20377   match(Set dst (AddVD dst src));
20378   format %{ "addpd   $dst,$src\t! add packedD" %}
20379   ins_encode %{
20380     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20381   %}
20382   ins_pipe( pipe_slow );
20383 %}
20384 
20385 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20386   predicate(UseAVX > 0);
20387   match(Set dst (AddVD src1 src2));
20388   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20389   ins_encode %{
20390     int vlen_enc = vector_length_encoding(this);
20391     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20392   %}
20393   ins_pipe( pipe_slow );
20394 %}
20395 
20396 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20397   predicate((UseAVX > 0) &&
20398             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20399   match(Set dst (AddVD src (LoadVector mem)));
20400   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20401   ins_encode %{
20402     int vlen_enc = vector_length_encoding(this);
20403     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20404   %}
20405   ins_pipe( pipe_slow );
20406 %}
20407 
20408 // --------------------------------- SUB --------------------------------------
20409 
20410 // Bytes vector sub
20411 instruct vsubB(vec dst, vec src) %{
20412   predicate(UseAVX == 0);
20413   match(Set dst (SubVB dst src));
20414   format %{ "psubb   $dst,$src\t! sub packedB" %}
20415   ins_encode %{
20416     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20417   %}
20418   ins_pipe( pipe_slow );
20419 %}
20420 
20421 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20422   predicate(UseAVX > 0);
20423   match(Set dst (SubVB src1 src2));
20424   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20425   ins_encode %{
20426     int vlen_enc = vector_length_encoding(this);
20427     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20428   %}
20429   ins_pipe( pipe_slow );
20430 %}
20431 
20432 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20433   predicate((UseAVX > 0) &&
20434             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20435   match(Set dst (SubVB src (LoadVector mem)));
20436   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20437   ins_encode %{
20438     int vlen_enc = vector_length_encoding(this);
20439     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20440   %}
20441   ins_pipe( pipe_slow );
20442 %}
20443 
20444 // Shorts/Chars vector sub
20445 instruct vsubS(vec dst, vec src) %{
20446   predicate(UseAVX == 0);
20447   match(Set dst (SubVS dst src));
20448   format %{ "psubw   $dst,$src\t! sub packedS" %}
20449   ins_encode %{
20450     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20451   %}
20452   ins_pipe( pipe_slow );
20453 %}
20454 
20455 
20456 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20457   predicate(UseAVX > 0);
20458   match(Set dst (SubVS src1 src2));
20459   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20460   ins_encode %{
20461     int vlen_enc = vector_length_encoding(this);
20462     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20463   %}
20464   ins_pipe( pipe_slow );
20465 %}
20466 
20467 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20468   predicate((UseAVX > 0) &&
20469             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20470   match(Set dst (SubVS src (LoadVector mem)));
20471   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20472   ins_encode %{
20473     int vlen_enc = vector_length_encoding(this);
20474     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20475   %}
20476   ins_pipe( pipe_slow );
20477 %}
20478 
20479 // Integers vector sub
20480 instruct vsubI(vec dst, vec src) %{
20481   predicate(UseAVX == 0);
20482   match(Set dst (SubVI dst src));
20483   format %{ "psubd   $dst,$src\t! sub packedI" %}
20484   ins_encode %{
20485     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20486   %}
20487   ins_pipe( pipe_slow );
20488 %}
20489 
20490 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20491   predicate(UseAVX > 0);
20492   match(Set dst (SubVI src1 src2));
20493   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20494   ins_encode %{
20495     int vlen_enc = vector_length_encoding(this);
20496     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20497   %}
20498   ins_pipe( pipe_slow );
20499 %}
20500 
20501 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20502   predicate((UseAVX > 0) &&
20503             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20504   match(Set dst (SubVI src (LoadVector mem)));
20505   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20506   ins_encode %{
20507     int vlen_enc = vector_length_encoding(this);
20508     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20509   %}
20510   ins_pipe( pipe_slow );
20511 %}
20512 
20513 // Longs vector sub
20514 instruct vsubL(vec dst, vec src) %{
20515   predicate(UseAVX == 0);
20516   match(Set dst (SubVL dst src));
20517   format %{ "psubq   $dst,$src\t! sub packedL" %}
20518   ins_encode %{
20519     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20520   %}
20521   ins_pipe( pipe_slow );
20522 %}
20523 
20524 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20525   predicate(UseAVX > 0);
20526   match(Set dst (SubVL src1 src2));
20527   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20528   ins_encode %{
20529     int vlen_enc = vector_length_encoding(this);
20530     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20531   %}
20532   ins_pipe( pipe_slow );
20533 %}
20534 
20535 
20536 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20537   predicate((UseAVX > 0) &&
20538             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20539   match(Set dst (SubVL src (LoadVector mem)));
20540   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20541   ins_encode %{
20542     int vlen_enc = vector_length_encoding(this);
20543     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20544   %}
20545   ins_pipe( pipe_slow );
20546 %}
20547 
20548 // Floats vector sub
20549 instruct vsubF(vec dst, vec src) %{
20550   predicate(UseAVX == 0);
20551   match(Set dst (SubVF dst src));
20552   format %{ "subps   $dst,$src\t! sub packedF" %}
20553   ins_encode %{
20554     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20555   %}
20556   ins_pipe( pipe_slow );
20557 %}
20558 
20559 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20560   predicate(UseAVX > 0);
20561   match(Set dst (SubVF src1 src2));
20562   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20563   ins_encode %{
20564     int vlen_enc = vector_length_encoding(this);
20565     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20566   %}
20567   ins_pipe( pipe_slow );
20568 %}
20569 
20570 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20571   predicate((UseAVX > 0) &&
20572             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20573   match(Set dst (SubVF src (LoadVector mem)));
20574   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20575   ins_encode %{
20576     int vlen_enc = vector_length_encoding(this);
20577     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20578   %}
20579   ins_pipe( pipe_slow );
20580 %}
20581 
20582 // Doubles vector sub
20583 instruct vsubD(vec dst, vec src) %{
20584   predicate(UseAVX == 0);
20585   match(Set dst (SubVD dst src));
20586   format %{ "subpd   $dst,$src\t! sub packedD" %}
20587   ins_encode %{
20588     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20589   %}
20590   ins_pipe( pipe_slow );
20591 %}
20592 
20593 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20594   predicate(UseAVX > 0);
20595   match(Set dst (SubVD src1 src2));
20596   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20597   ins_encode %{
20598     int vlen_enc = vector_length_encoding(this);
20599     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20600   %}
20601   ins_pipe( pipe_slow );
20602 %}
20603 
20604 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20605   predicate((UseAVX > 0) &&
20606             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20607   match(Set dst (SubVD src (LoadVector mem)));
20608   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20609   ins_encode %{
20610     int vlen_enc = vector_length_encoding(this);
20611     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20612   %}
20613   ins_pipe( pipe_slow );
20614 %}
20615 
20616 // --------------------------------- MUL --------------------------------------
20617 
20618 // Byte vector mul
20619 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20620   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20621   match(Set dst (MulVB src1 src2));
20622   effect(TEMP dst, TEMP xtmp);
20623   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20624   ins_encode %{
20625     assert(UseSSE > 3, "required");
20626     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20627     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20628     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20629     __ psllw($dst$$XMMRegister, 8);
20630     __ psrlw($dst$$XMMRegister, 8);
20631     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20632   %}
20633   ins_pipe( pipe_slow );
20634 %}
20635 
20636 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20637   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20638   match(Set dst (MulVB src1 src2));
20639   effect(TEMP dst, TEMP xtmp);
20640   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20641   ins_encode %{
20642     assert(UseSSE > 3, "required");
20643     // Odd-index elements
20644     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20645     __ psrlw($dst$$XMMRegister, 8);
20646     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20647     __ psrlw($xtmp$$XMMRegister, 8);
20648     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20649     __ psllw($dst$$XMMRegister, 8);
20650     // Even-index elements
20651     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20652     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20653     __ psllw($xtmp$$XMMRegister, 8);
20654     __ psrlw($xtmp$$XMMRegister, 8);
20655     // Combine
20656     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20657   %}
20658   ins_pipe( pipe_slow );
20659 %}
20660 
20661 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20662   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20663   match(Set dst (MulVB src1 src2));
20664   effect(TEMP xtmp1, TEMP xtmp2);
20665   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20666   ins_encode %{
20667     int vlen_enc = vector_length_encoding(this);
20668     // Odd-index elements
20669     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20670     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20671     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20672     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20673     // Even-index elements
20674     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20675     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20676     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20677     // Combine
20678     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20679   %}
20680   ins_pipe( pipe_slow );
20681 %}
20682 
20683 // Shorts/Chars vector mul
20684 instruct vmulS(vec dst, vec src) %{
20685   predicate(UseAVX == 0);
20686   match(Set dst (MulVS dst src));
20687   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20688   ins_encode %{
20689     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20690   %}
20691   ins_pipe( pipe_slow );
20692 %}
20693 
20694 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20695   predicate(UseAVX > 0);
20696   match(Set dst (MulVS src1 src2));
20697   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20698   ins_encode %{
20699     int vlen_enc = vector_length_encoding(this);
20700     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20701   %}
20702   ins_pipe( pipe_slow );
20703 %}
20704 
20705 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20706   predicate((UseAVX > 0) &&
20707             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20708   match(Set dst (MulVS src (LoadVector mem)));
20709   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20710   ins_encode %{
20711     int vlen_enc = vector_length_encoding(this);
20712     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20713   %}
20714   ins_pipe( pipe_slow );
20715 %}
20716 
20717 // Integers vector mul
20718 instruct vmulI(vec dst, vec src) %{
20719   predicate(UseAVX == 0);
20720   match(Set dst (MulVI dst src));
20721   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20722   ins_encode %{
20723     assert(UseSSE > 3, "required");
20724     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20725   %}
20726   ins_pipe( pipe_slow );
20727 %}
20728 
20729 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20730   predicate(UseAVX > 0);
20731   match(Set dst (MulVI src1 src2));
20732   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20733   ins_encode %{
20734     int vlen_enc = vector_length_encoding(this);
20735     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20736   %}
20737   ins_pipe( pipe_slow );
20738 %}
20739 
20740 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20741   predicate((UseAVX > 0) &&
20742             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20743   match(Set dst (MulVI src (LoadVector mem)));
20744   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20745   ins_encode %{
20746     int vlen_enc = vector_length_encoding(this);
20747     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20748   %}
20749   ins_pipe( pipe_slow );
20750 %}
20751 
20752 // Longs vector mul
20753 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20754   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20755              VM_Version::supports_avx512dq()) ||
20756             VM_Version::supports_avx512vldq());
20757   match(Set dst (MulVL src1 src2));
20758   ins_cost(500);
20759   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20760   ins_encode %{
20761     assert(UseAVX > 2, "required");
20762     int vlen_enc = vector_length_encoding(this);
20763     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20764   %}
20765   ins_pipe( pipe_slow );
20766 %}
20767 
20768 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20769   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20770              VM_Version::supports_avx512dq()) ||
20771             (Matcher::vector_length_in_bytes(n) > 8 &&
20772              VM_Version::supports_avx512vldq()));
20773   match(Set dst (MulVL src (LoadVector mem)));
20774   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20775   ins_cost(500);
20776   ins_encode %{
20777     assert(UseAVX > 2, "required");
20778     int vlen_enc = vector_length_encoding(this);
20779     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20780   %}
20781   ins_pipe( pipe_slow );
20782 %}
20783 
20784 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20785   predicate(UseAVX == 0);
20786   match(Set dst (MulVL src1 src2));
20787   ins_cost(500);
20788   effect(TEMP dst, TEMP xtmp);
20789   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20790   ins_encode %{
20791     assert(VM_Version::supports_sse4_1(), "required");
20792     // Get the lo-hi products, only the lower 32 bits is in concerns
20793     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20794     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20795     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20796     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20797     __ psllq($dst$$XMMRegister, 32);
20798     // Get the lo-lo products
20799     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20800     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20801     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20802   %}
20803   ins_pipe( pipe_slow );
20804 %}
20805 
20806 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20807   predicate(UseAVX > 0 &&
20808             ((Matcher::vector_length_in_bytes(n) == 64 &&
20809               !VM_Version::supports_avx512dq()) ||
20810              (Matcher::vector_length_in_bytes(n) < 64 &&
20811               !VM_Version::supports_avx512vldq())));
20812   match(Set dst (MulVL src1 src2));
20813   effect(TEMP xtmp1, TEMP xtmp2);
20814   ins_cost(500);
20815   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20816   ins_encode %{
20817     int vlen_enc = vector_length_encoding(this);
20818     // Get the lo-hi products, only the lower 32 bits is in concerns
20819     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20820     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20821     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20822     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20823     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20824     // Get the lo-lo products
20825     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20826     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20827   %}
20828   ins_pipe( pipe_slow );
20829 %}
20830 
20831 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20832   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20833   match(Set dst (MulVL src1 src2));
20834   ins_cost(100);
20835   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20836   ins_encode %{
20837     int vlen_enc = vector_length_encoding(this);
20838     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20839   %}
20840   ins_pipe( pipe_slow );
20841 %}
20842 
20843 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20844   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20845   match(Set dst (MulVL src1 src2));
20846   ins_cost(100);
20847   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20848   ins_encode %{
20849     int vlen_enc = vector_length_encoding(this);
20850     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20851   %}
20852   ins_pipe( pipe_slow );
20853 %}
20854 
20855 // Floats vector mul
20856 instruct vmulF(vec dst, vec src) %{
20857   predicate(UseAVX == 0);
20858   match(Set dst (MulVF dst src));
20859   format %{ "mulps   $dst,$src\t! mul packedF" %}
20860   ins_encode %{
20861     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20862   %}
20863   ins_pipe( pipe_slow );
20864 %}
20865 
20866 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20867   predicate(UseAVX > 0);
20868   match(Set dst (MulVF src1 src2));
20869   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20870   ins_encode %{
20871     int vlen_enc = vector_length_encoding(this);
20872     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20873   %}
20874   ins_pipe( pipe_slow );
20875 %}
20876 
20877 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20878   predicate((UseAVX > 0) &&
20879             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20880   match(Set dst (MulVF src (LoadVector mem)));
20881   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20882   ins_encode %{
20883     int vlen_enc = vector_length_encoding(this);
20884     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20885   %}
20886   ins_pipe( pipe_slow );
20887 %}
20888 
20889 // Doubles vector mul
20890 instruct vmulD(vec dst, vec src) %{
20891   predicate(UseAVX == 0);
20892   match(Set dst (MulVD dst src));
20893   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20894   ins_encode %{
20895     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20896   %}
20897   ins_pipe( pipe_slow );
20898 %}
20899 
20900 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20901   predicate(UseAVX > 0);
20902   match(Set dst (MulVD src1 src2));
20903   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20904   ins_encode %{
20905     int vlen_enc = vector_length_encoding(this);
20906     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20907   %}
20908   ins_pipe( pipe_slow );
20909 %}
20910 
20911 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20912   predicate((UseAVX > 0) &&
20913             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20914   match(Set dst (MulVD src (LoadVector mem)));
20915   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20916   ins_encode %{
20917     int vlen_enc = vector_length_encoding(this);
20918     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20919   %}
20920   ins_pipe( pipe_slow );
20921 %}
20922 
20923 // --------------------------------- DIV --------------------------------------
20924 
20925 // Floats vector div
20926 instruct vdivF(vec dst, vec src) %{
20927   predicate(UseAVX == 0);
20928   match(Set dst (DivVF dst src));
20929   format %{ "divps   $dst,$src\t! div packedF" %}
20930   ins_encode %{
20931     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20932   %}
20933   ins_pipe( pipe_slow );
20934 %}
20935 
20936 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20937   predicate(UseAVX > 0);
20938   match(Set dst (DivVF src1 src2));
20939   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20940   ins_encode %{
20941     int vlen_enc = vector_length_encoding(this);
20942     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20943   %}
20944   ins_pipe( pipe_slow );
20945 %}
20946 
20947 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20948   predicate((UseAVX > 0) &&
20949             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20950   match(Set dst (DivVF src (LoadVector mem)));
20951   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20952   ins_encode %{
20953     int vlen_enc = vector_length_encoding(this);
20954     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20955   %}
20956   ins_pipe( pipe_slow );
20957 %}
20958 
20959 // Doubles vector div
20960 instruct vdivD(vec dst, vec src) %{
20961   predicate(UseAVX == 0);
20962   match(Set dst (DivVD dst src));
20963   format %{ "divpd   $dst,$src\t! div packedD" %}
20964   ins_encode %{
20965     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20966   %}
20967   ins_pipe( pipe_slow );
20968 %}
20969 
20970 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20971   predicate(UseAVX > 0);
20972   match(Set dst (DivVD src1 src2));
20973   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20974   ins_encode %{
20975     int vlen_enc = vector_length_encoding(this);
20976     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20977   %}
20978   ins_pipe( pipe_slow );
20979 %}
20980 
20981 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20982   predicate((UseAVX > 0) &&
20983             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20984   match(Set dst (DivVD src (LoadVector mem)));
20985   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20986   ins_encode %{
20987     int vlen_enc = vector_length_encoding(this);
20988     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20989   %}
20990   ins_pipe( pipe_slow );
20991 %}
20992 
20993 // ------------------------------ MinMax ---------------------------------------
20994 
20995 // Byte, Short, Int vector Min/Max
20996 instruct minmax_reg_sse(vec dst, vec src) %{
20997   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20998             UseAVX == 0);
20999   match(Set dst (MinV dst src));
21000   match(Set dst (MaxV dst src));
21001   format %{ "vector_minmax  $dst,$src\t!  " %}
21002   ins_encode %{
21003     assert(UseSSE >= 4, "required");
21004 
21005     int opcode = this->ideal_Opcode();
21006     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21007     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
21008   %}
21009   ins_pipe( pipe_slow );
21010 %}
21011 
21012 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
21013   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21014             UseAVX > 0);
21015   match(Set dst (MinV src1 src2));
21016   match(Set dst (MaxV src1 src2));
21017   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
21018   ins_encode %{
21019     int opcode = this->ideal_Opcode();
21020     int vlen_enc = vector_length_encoding(this);
21021     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21022 
21023     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21024   %}
21025   ins_pipe( pipe_slow );
21026 %}
21027 
21028 // Long vector Min/Max
21029 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
21030   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
21031             UseAVX == 0);
21032   match(Set dst (MinV dst src));
21033   match(Set dst (MaxV src dst));
21034   effect(TEMP dst, TEMP tmp);
21035   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
21036   ins_encode %{
21037     assert(UseSSE >= 4, "required");
21038 
21039     int opcode = this->ideal_Opcode();
21040     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21041     assert(elem_bt == T_LONG, "sanity");
21042 
21043     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
21044   %}
21045   ins_pipe( pipe_slow );
21046 %}
21047 
21048 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
21049   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
21050             UseAVX > 0 && !VM_Version::supports_avx512vl());
21051   match(Set dst (MinV src1 src2));
21052   match(Set dst (MaxV src1 src2));
21053   effect(TEMP dst);
21054   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
21055   ins_encode %{
21056     int vlen_enc = vector_length_encoding(this);
21057     int opcode = this->ideal_Opcode();
21058     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21059     assert(elem_bt == T_LONG, "sanity");
21060 
21061     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21062   %}
21063   ins_pipe( pipe_slow );
21064 %}
21065 
21066 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
21067   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
21068             Matcher::vector_element_basic_type(n) == T_LONG);
21069   match(Set dst (MinV src1 src2));
21070   match(Set dst (MaxV src1 src2));
21071   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
21072   ins_encode %{
21073     assert(UseAVX > 2, "required");
21074 
21075     int vlen_enc = vector_length_encoding(this);
21076     int opcode = this->ideal_Opcode();
21077     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21078     assert(elem_bt == T_LONG, "sanity");
21079 
21080     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21081   %}
21082   ins_pipe( pipe_slow );
21083 %}
21084 
21085 // Float/Double vector Min/Max
21086 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
21087   predicate(VM_Version::supports_avx10_2() &&
21088             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21089   match(Set dst (MinV a b));
21090   match(Set dst (MaxV a b));
21091   format %{ "vector_minmaxFP  $dst, $a, $b" %}
21092   ins_encode %{
21093     int vlen_enc = vector_length_encoding(this);
21094     int opcode = this->ideal_Opcode();
21095     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21096     __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21097   %}
21098   ins_pipe( pipe_slow );
21099 %}
21100 
21101 // Float/Double vector Min/Max
21102 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
21103   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
21104             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
21105             UseAVX > 0);
21106   match(Set dst (MinV a b));
21107   match(Set dst (MaxV a b));
21108   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
21109   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
21110   ins_encode %{
21111     assert(UseAVX > 0, "required");
21112 
21113     int opcode = this->ideal_Opcode();
21114     int vlen_enc = vector_length_encoding(this);
21115     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21116 
21117     __ vminmax_fp(opcode, elem_bt,
21118                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21119                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21120   %}
21121   ins_pipe( pipe_slow );
21122 %}
21123 
21124 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
21125   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
21126             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21127   match(Set dst (MinV a b));
21128   match(Set dst (MaxV a b));
21129   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
21130   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21131   ins_encode %{
21132     assert(UseAVX > 2, "required");
21133 
21134     int opcode = this->ideal_Opcode();
21135     int vlen_enc = vector_length_encoding(this);
21136     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21137 
21138     __ evminmax_fp(opcode, elem_bt,
21139                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21140                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21141   %}
21142   ins_pipe( pipe_slow );
21143 %}
21144 
21145 // ------------------------------ Unsigned vector Min/Max ----------------------
21146 
21147 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21148   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21149   match(Set dst (UMinV a b));
21150   match(Set dst (UMaxV a b));
21151   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21152   ins_encode %{
21153     int opcode = this->ideal_Opcode();
21154     int vlen_enc = vector_length_encoding(this);
21155     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21156     assert(is_integral_type(elem_bt), "");
21157     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21158   %}
21159   ins_pipe( pipe_slow );
21160 %}
21161 
21162 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21163   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21164   match(Set dst (UMinV a (LoadVector b)));
21165   match(Set dst (UMaxV a (LoadVector b)));
21166   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21167   ins_encode %{
21168     int opcode = this->ideal_Opcode();
21169     int vlen_enc = vector_length_encoding(this);
21170     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21171     assert(is_integral_type(elem_bt), "");
21172     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21173   %}
21174   ins_pipe( pipe_slow );
21175 %}
21176 
21177 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21178   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21179   match(Set dst (UMinV a b));
21180   match(Set dst (UMaxV a b));
21181   effect(TEMP xtmp1, TEMP xtmp2);
21182   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21183   ins_encode %{
21184     int opcode = this->ideal_Opcode();
21185     int vlen_enc = vector_length_encoding(this);
21186     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21187   %}
21188   ins_pipe( pipe_slow );
21189 %}
21190 
21191 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21192   match(Set dst (UMinV (Binary dst src2) mask));
21193   match(Set dst (UMaxV (Binary dst src2) mask));
21194   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21195   ins_encode %{
21196     int vlen_enc = vector_length_encoding(this);
21197     BasicType bt = Matcher::vector_element_basic_type(this);
21198     int opc = this->ideal_Opcode();
21199     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21200                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21201   %}
21202   ins_pipe( pipe_slow );
21203 %}
21204 
21205 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21206   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21207   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21208   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21209   ins_encode %{
21210     int vlen_enc = vector_length_encoding(this);
21211     BasicType bt = Matcher::vector_element_basic_type(this);
21212     int opc = this->ideal_Opcode();
21213     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21214                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21215   %}
21216   ins_pipe( pipe_slow );
21217 %}
21218 
21219 // --------------------------------- Signum/CopySign ---------------------------
21220 
21221 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21222   match(Set dst (SignumF dst (Binary zero one)));
21223   effect(KILL cr);
21224   format %{ "signumF $dst, $dst" %}
21225   ins_encode %{
21226     int opcode = this->ideal_Opcode();
21227     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21228   %}
21229   ins_pipe( pipe_slow );
21230 %}
21231 
21232 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21233   match(Set dst (SignumD dst (Binary zero one)));
21234   effect(KILL cr);
21235   format %{ "signumD $dst, $dst" %}
21236   ins_encode %{
21237     int opcode = this->ideal_Opcode();
21238     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21239   %}
21240   ins_pipe( pipe_slow );
21241 %}
21242 
21243 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21244   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21245   match(Set dst (SignumVF src (Binary zero one)));
21246   match(Set dst (SignumVD src (Binary zero one)));
21247   effect(TEMP dst, TEMP xtmp1);
21248   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21249   ins_encode %{
21250     int opcode = this->ideal_Opcode();
21251     int vec_enc = vector_length_encoding(this);
21252     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21253                          $xtmp1$$XMMRegister, vec_enc);
21254   %}
21255   ins_pipe( pipe_slow );
21256 %}
21257 
21258 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21259   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21260   match(Set dst (SignumVF src (Binary zero one)));
21261   match(Set dst (SignumVD src (Binary zero one)));
21262   effect(TEMP dst, TEMP ktmp1);
21263   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21264   ins_encode %{
21265     int opcode = this->ideal_Opcode();
21266     int vec_enc = vector_length_encoding(this);
21267     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21268                           $ktmp1$$KRegister, vec_enc);
21269   %}
21270   ins_pipe( pipe_slow );
21271 %}
21272 
21273 // ---------------------------------------
21274 // For copySign use 0xE4 as writemask for vpternlog
21275 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21276 // C (xmm2) is set to 0x7FFFFFFF
21277 // Wherever xmm2 is 0, we want to pick from B (sign)
21278 // Wherever xmm2 is 1, we want to pick from A (src)
21279 //
21280 // A B C Result
21281 // 0 0 0 0
21282 // 0 0 1 0
21283 // 0 1 0 1
21284 // 0 1 1 0
21285 // 1 0 0 0
21286 // 1 0 1 1
21287 // 1 1 0 1
21288 // 1 1 1 1
21289 //
21290 // Result going from high bit to low bit is 0x11100100 = 0xe4
21291 // ---------------------------------------
21292 
21293 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21294   match(Set dst (CopySignF dst src));
21295   effect(TEMP tmp1, TEMP tmp2);
21296   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21297   ins_encode %{
21298     __ movl($tmp2$$Register, 0x7FFFFFFF);
21299     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21300     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21301   %}
21302   ins_pipe( pipe_slow );
21303 %}
21304 
21305 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21306   match(Set dst (CopySignD dst (Binary src zero)));
21307   ins_cost(100);
21308   effect(TEMP tmp1, TEMP tmp2);
21309   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21310   ins_encode %{
21311     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21312     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21313     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21314   %}
21315   ins_pipe( pipe_slow );
21316 %}
21317 
21318 //----------------------------- CompressBits/ExpandBits ------------------------
21319 
21320 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21321   predicate(n->bottom_type()->isa_int());
21322   match(Set dst (CompressBits src mask));
21323   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21324   ins_encode %{
21325     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21326   %}
21327   ins_pipe( pipe_slow );
21328 %}
21329 
21330 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21331   predicate(n->bottom_type()->isa_int());
21332   match(Set dst (ExpandBits src mask));
21333   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21334   ins_encode %{
21335     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21336   %}
21337   ins_pipe( pipe_slow );
21338 %}
21339 
21340 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21341   predicate(n->bottom_type()->isa_int());
21342   match(Set dst (CompressBits src (LoadI mask)));
21343   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21344   ins_encode %{
21345     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21346   %}
21347   ins_pipe( pipe_slow );
21348 %}
21349 
21350 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21351   predicate(n->bottom_type()->isa_int());
21352   match(Set dst (ExpandBits src (LoadI mask)));
21353   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21354   ins_encode %{
21355     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21356   %}
21357   ins_pipe( pipe_slow );
21358 %}
21359 
21360 // --------------------------------- Sqrt --------------------------------------
21361 
21362 instruct vsqrtF_reg(vec dst, vec src) %{
21363   match(Set dst (SqrtVF src));
21364   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21365   ins_encode %{
21366     assert(UseAVX > 0, "required");
21367     int vlen_enc = vector_length_encoding(this);
21368     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21369   %}
21370   ins_pipe( pipe_slow );
21371 %}
21372 
21373 instruct vsqrtF_mem(vec dst, memory mem) %{
21374   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21375   match(Set dst (SqrtVF (LoadVector mem)));
21376   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21377   ins_encode %{
21378     assert(UseAVX > 0, "required");
21379     int vlen_enc = vector_length_encoding(this);
21380     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21381   %}
21382   ins_pipe( pipe_slow );
21383 %}
21384 
21385 // Floating point vector sqrt
21386 instruct vsqrtD_reg(vec dst, vec src) %{
21387   match(Set dst (SqrtVD src));
21388   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21389   ins_encode %{
21390     assert(UseAVX > 0, "required");
21391     int vlen_enc = vector_length_encoding(this);
21392     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21393   %}
21394   ins_pipe( pipe_slow );
21395 %}
21396 
21397 instruct vsqrtD_mem(vec dst, memory mem) %{
21398   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21399   match(Set dst (SqrtVD (LoadVector mem)));
21400   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21401   ins_encode %{
21402     assert(UseAVX > 0, "required");
21403     int vlen_enc = vector_length_encoding(this);
21404     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21405   %}
21406   ins_pipe( pipe_slow );
21407 %}
21408 
21409 // ------------------------------ Shift ---------------------------------------
21410 
21411 // Left and right shift count vectors are the same on x86
21412 // (only lowest bits of xmm reg are used for count).
21413 instruct vshiftcnt(vec dst, rRegI cnt) %{
21414   match(Set dst (LShiftCntV cnt));
21415   match(Set dst (RShiftCntV cnt));
21416   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21417   ins_encode %{
21418     __ movdl($dst$$XMMRegister, $cnt$$Register);
21419   %}
21420   ins_pipe( pipe_slow );
21421 %}
21422 
21423 // Byte vector shift
21424 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21425   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21426   match(Set dst ( LShiftVB src shift));
21427   match(Set dst ( RShiftVB src shift));
21428   match(Set dst (URShiftVB src shift));
21429   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21430   format %{"vector_byte_shift $dst,$src,$shift" %}
21431   ins_encode %{
21432     assert(UseSSE > 3, "required");
21433     int opcode = this->ideal_Opcode();
21434     bool sign = (opcode != Op_URShiftVB);
21435     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21436     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21437     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21438     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21439     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21440   %}
21441   ins_pipe( pipe_slow );
21442 %}
21443 
21444 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21445   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21446             UseAVX <= 1);
21447   match(Set dst ( LShiftVB src shift));
21448   match(Set dst ( RShiftVB src shift));
21449   match(Set dst (URShiftVB src shift));
21450   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21451   format %{"vector_byte_shift $dst,$src,$shift" %}
21452   ins_encode %{
21453     assert(UseSSE > 3, "required");
21454     int opcode = this->ideal_Opcode();
21455     bool sign = (opcode != Op_URShiftVB);
21456     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21457     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21458     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21459     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21460     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21461     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21462     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21463     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21464     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21465   %}
21466   ins_pipe( pipe_slow );
21467 %}
21468 
21469 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21470   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21471             UseAVX > 1);
21472   match(Set dst ( LShiftVB src shift));
21473   match(Set dst ( RShiftVB src shift));
21474   match(Set dst (URShiftVB src shift));
21475   effect(TEMP dst, TEMP tmp);
21476   format %{"vector_byte_shift $dst,$src,$shift" %}
21477   ins_encode %{
21478     int opcode = this->ideal_Opcode();
21479     bool sign = (opcode != Op_URShiftVB);
21480     int vlen_enc = Assembler::AVX_256bit;
21481     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21482     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21483     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21484     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21485     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21486   %}
21487   ins_pipe( pipe_slow );
21488 %}
21489 
21490 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21491   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21492   match(Set dst ( LShiftVB src shift));
21493   match(Set dst ( RShiftVB src shift));
21494   match(Set dst (URShiftVB src shift));
21495   effect(TEMP dst, TEMP tmp);
21496   format %{"vector_byte_shift $dst,$src,$shift" %}
21497   ins_encode %{
21498     assert(UseAVX > 1, "required");
21499     int opcode = this->ideal_Opcode();
21500     bool sign = (opcode != Op_URShiftVB);
21501     int vlen_enc = Assembler::AVX_256bit;
21502     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21503     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21504     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21505     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21506     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21507     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21508     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21509     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21510     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21511   %}
21512   ins_pipe( pipe_slow );
21513 %}
21514 
21515 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21516   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21517   match(Set dst ( LShiftVB src shift));
21518   match(Set dst  (RShiftVB src shift));
21519   match(Set dst (URShiftVB src shift));
21520   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21521   format %{"vector_byte_shift $dst,$src,$shift" %}
21522   ins_encode %{
21523     assert(UseAVX > 2, "required");
21524     int opcode = this->ideal_Opcode();
21525     bool sign = (opcode != Op_URShiftVB);
21526     int vlen_enc = Assembler::AVX_512bit;
21527     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21528     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21529     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21530     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21531     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21532     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21533     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21534     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21535     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21536     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21537     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21538     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21539   %}
21540   ins_pipe( pipe_slow );
21541 %}
21542 
21543 // Shorts vector logical right shift produces incorrect Java result
21544 // for negative data because java code convert short value into int with
21545 // sign extension before a shift. But char vectors are fine since chars are
21546 // unsigned values.
21547 // Shorts/Chars vector left shift
21548 instruct vshiftS(vec dst, vec src, vec shift) %{
21549   predicate(!n->as_ShiftV()->is_var_shift());
21550   match(Set dst ( LShiftVS src shift));
21551   match(Set dst ( RShiftVS src shift));
21552   match(Set dst (URShiftVS src shift));
21553   effect(TEMP dst, USE src, USE shift);
21554   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21555   ins_encode %{
21556     int opcode = this->ideal_Opcode();
21557     if (UseAVX > 0) {
21558       int vlen_enc = vector_length_encoding(this);
21559       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21560     } else {
21561       int vlen = Matcher::vector_length(this);
21562       if (vlen == 2) {
21563         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21564         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21565       } else if (vlen == 4) {
21566         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21567         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21568       } else {
21569         assert (vlen == 8, "sanity");
21570         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21571         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21572       }
21573     }
21574   %}
21575   ins_pipe( pipe_slow );
21576 %}
21577 
21578 // Integers vector left shift
21579 instruct vshiftI(vec dst, vec src, vec shift) %{
21580   predicate(!n->as_ShiftV()->is_var_shift());
21581   match(Set dst ( LShiftVI src shift));
21582   match(Set dst ( RShiftVI src shift));
21583   match(Set dst (URShiftVI src shift));
21584   effect(TEMP dst, USE src, USE shift);
21585   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21586   ins_encode %{
21587     int opcode = this->ideal_Opcode();
21588     if (UseAVX > 0) {
21589       int vlen_enc = vector_length_encoding(this);
21590       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21591     } else {
21592       int vlen = Matcher::vector_length(this);
21593       if (vlen == 2) {
21594         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21595         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21596       } else {
21597         assert(vlen == 4, "sanity");
21598         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21599         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21600       }
21601     }
21602   %}
21603   ins_pipe( pipe_slow );
21604 %}
21605 
21606 // Integers vector left constant shift
21607 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21608   match(Set dst (LShiftVI src (LShiftCntV shift)));
21609   match(Set dst (RShiftVI src (RShiftCntV shift)));
21610   match(Set dst (URShiftVI src (RShiftCntV shift)));
21611   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21612   ins_encode %{
21613     int opcode = this->ideal_Opcode();
21614     if (UseAVX > 0) {
21615       int vector_len = vector_length_encoding(this);
21616       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21617     } else {
21618       int vlen = Matcher::vector_length(this);
21619       if (vlen == 2) {
21620         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21621         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21622       } else {
21623         assert(vlen == 4, "sanity");
21624         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21625         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21626       }
21627     }
21628   %}
21629   ins_pipe( pipe_slow );
21630 %}
21631 
21632 // Longs vector shift
21633 instruct vshiftL(vec dst, vec src, vec shift) %{
21634   predicate(!n->as_ShiftV()->is_var_shift());
21635   match(Set dst ( LShiftVL src shift));
21636   match(Set dst (URShiftVL src shift));
21637   effect(TEMP dst, USE src, USE shift);
21638   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21639   ins_encode %{
21640     int opcode = this->ideal_Opcode();
21641     if (UseAVX > 0) {
21642       int vlen_enc = vector_length_encoding(this);
21643       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21644     } else {
21645       assert(Matcher::vector_length(this) == 2, "");
21646       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21647       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21648     }
21649   %}
21650   ins_pipe( pipe_slow );
21651 %}
21652 
21653 // Longs vector constant shift
21654 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21655   match(Set dst (LShiftVL src (LShiftCntV shift)));
21656   match(Set dst (URShiftVL src (RShiftCntV shift)));
21657   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21658   ins_encode %{
21659     int opcode = this->ideal_Opcode();
21660     if (UseAVX > 0) {
21661       int vector_len = vector_length_encoding(this);
21662       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21663     } else {
21664       assert(Matcher::vector_length(this) == 2, "");
21665       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21666       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21667     }
21668   %}
21669   ins_pipe( pipe_slow );
21670 %}
21671 
21672 // -------------------ArithmeticRightShift -----------------------------------
21673 // Long vector arithmetic right shift
21674 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21675   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21676   match(Set dst (RShiftVL src shift));
21677   effect(TEMP dst, TEMP tmp);
21678   format %{ "vshiftq $dst,$src,$shift" %}
21679   ins_encode %{
21680     uint vlen = Matcher::vector_length(this);
21681     if (vlen == 2) {
21682       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21683       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21684       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21685       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21686       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21687       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21688     } else {
21689       assert(vlen == 4, "sanity");
21690       assert(UseAVX > 1, "required");
21691       int vlen_enc = Assembler::AVX_256bit;
21692       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21693       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21694       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21695       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21696       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21697     }
21698   %}
21699   ins_pipe( pipe_slow );
21700 %}
21701 
21702 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21703   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21704   match(Set dst (RShiftVL src shift));
21705   format %{ "vshiftq $dst,$src,$shift" %}
21706   ins_encode %{
21707     int vlen_enc = vector_length_encoding(this);
21708     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21709   %}
21710   ins_pipe( pipe_slow );
21711 %}
21712 
21713 // ------------------- Variable Shift -----------------------------
21714 // Byte variable shift
21715 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21716   predicate(Matcher::vector_length(n) <= 8 &&
21717             n->as_ShiftV()->is_var_shift() &&
21718             !VM_Version::supports_avx512bw());
21719   match(Set dst ( LShiftVB src shift));
21720   match(Set dst ( RShiftVB src shift));
21721   match(Set dst (URShiftVB src shift));
21722   effect(TEMP dst, TEMP vtmp);
21723   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21724   ins_encode %{
21725     assert(UseAVX >= 2, "required");
21726 
21727     int opcode = this->ideal_Opcode();
21728     int vlen_enc = Assembler::AVX_128bit;
21729     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21730     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21731   %}
21732   ins_pipe( pipe_slow );
21733 %}
21734 
21735 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21736   predicate(Matcher::vector_length(n) == 16 &&
21737             n->as_ShiftV()->is_var_shift() &&
21738             !VM_Version::supports_avx512bw());
21739   match(Set dst ( LShiftVB src shift));
21740   match(Set dst ( RShiftVB src shift));
21741   match(Set dst (URShiftVB src shift));
21742   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21743   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21744   ins_encode %{
21745     assert(UseAVX >= 2, "required");
21746 
21747     int opcode = this->ideal_Opcode();
21748     int vlen_enc = Assembler::AVX_128bit;
21749     // Shift lower half and get word result in dst
21750     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21751 
21752     // Shift upper half and get word result in vtmp1
21753     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21754     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21755     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21756 
21757     // Merge and down convert the two word results to byte in dst
21758     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21759   %}
21760   ins_pipe( pipe_slow );
21761 %}
21762 
21763 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21764   predicate(Matcher::vector_length(n) == 32 &&
21765             n->as_ShiftV()->is_var_shift() &&
21766             !VM_Version::supports_avx512bw());
21767   match(Set dst ( LShiftVB src shift));
21768   match(Set dst ( RShiftVB src shift));
21769   match(Set dst (URShiftVB src shift));
21770   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21771   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21772   ins_encode %{
21773     assert(UseAVX >= 2, "required");
21774 
21775     int opcode = this->ideal_Opcode();
21776     int vlen_enc = Assembler::AVX_128bit;
21777     // Process lower 128 bits and get result in dst
21778     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21779     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21780     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21781     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21782     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21783 
21784     // Process higher 128 bits and get result in vtmp3
21785     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21786     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21787     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21788     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21789     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21790     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21791     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21792 
21793     // Merge the two results in dst
21794     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21795   %}
21796   ins_pipe( pipe_slow );
21797 %}
21798 
21799 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21800   predicate(Matcher::vector_length(n) <= 32 &&
21801             n->as_ShiftV()->is_var_shift() &&
21802             VM_Version::supports_avx512bw());
21803   match(Set dst ( LShiftVB src shift));
21804   match(Set dst ( RShiftVB src shift));
21805   match(Set dst (URShiftVB src shift));
21806   effect(TEMP dst, TEMP vtmp);
21807   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21808   ins_encode %{
21809     assert(UseAVX > 2, "required");
21810 
21811     int opcode = this->ideal_Opcode();
21812     int vlen_enc = vector_length_encoding(this);
21813     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21814   %}
21815   ins_pipe( pipe_slow );
21816 %}
21817 
21818 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21819   predicate(Matcher::vector_length(n) == 64 &&
21820             n->as_ShiftV()->is_var_shift() &&
21821             VM_Version::supports_avx512bw());
21822   match(Set dst ( LShiftVB src shift));
21823   match(Set dst ( RShiftVB src shift));
21824   match(Set dst (URShiftVB src shift));
21825   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21826   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21827   ins_encode %{
21828     assert(UseAVX > 2, "required");
21829 
21830     int opcode = this->ideal_Opcode();
21831     int vlen_enc = Assembler::AVX_256bit;
21832     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21833     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21834     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21835     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21836     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21837   %}
21838   ins_pipe( pipe_slow );
21839 %}
21840 
21841 // Short variable shift
21842 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21843   predicate(Matcher::vector_length(n) <= 8 &&
21844             n->as_ShiftV()->is_var_shift() &&
21845             !VM_Version::supports_avx512bw());
21846   match(Set dst ( LShiftVS src shift));
21847   match(Set dst ( RShiftVS src shift));
21848   match(Set dst (URShiftVS src shift));
21849   effect(TEMP dst, TEMP vtmp);
21850   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21851   ins_encode %{
21852     assert(UseAVX >= 2, "required");
21853 
21854     int opcode = this->ideal_Opcode();
21855     bool sign = (opcode != Op_URShiftVS);
21856     int vlen_enc = Assembler::AVX_256bit;
21857     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21858     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21859     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21860     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21861     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21862     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21863   %}
21864   ins_pipe( pipe_slow );
21865 %}
21866 
21867 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21868   predicate(Matcher::vector_length(n) == 16 &&
21869             n->as_ShiftV()->is_var_shift() &&
21870             !VM_Version::supports_avx512bw());
21871   match(Set dst ( LShiftVS src shift));
21872   match(Set dst ( RShiftVS src shift));
21873   match(Set dst (URShiftVS src shift));
21874   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21875   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21876   ins_encode %{
21877     assert(UseAVX >= 2, "required");
21878 
21879     int opcode = this->ideal_Opcode();
21880     bool sign = (opcode != Op_URShiftVS);
21881     int vlen_enc = Assembler::AVX_256bit;
21882     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21883     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21884     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21885     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21886     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21887 
21888     // Shift upper half, with result in dst using vtmp1 as TEMP
21889     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21890     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21891     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21892     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21893     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21894     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21895 
21896     // Merge lower and upper half result into dst
21897     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21898     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21899   %}
21900   ins_pipe( pipe_slow );
21901 %}
21902 
21903 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21904   predicate(n->as_ShiftV()->is_var_shift() &&
21905             VM_Version::supports_avx512bw());
21906   match(Set dst ( LShiftVS src shift));
21907   match(Set dst ( RShiftVS src shift));
21908   match(Set dst (URShiftVS src shift));
21909   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21910   ins_encode %{
21911     assert(UseAVX > 2, "required");
21912 
21913     int opcode = this->ideal_Opcode();
21914     int vlen_enc = vector_length_encoding(this);
21915     if (!VM_Version::supports_avx512vl()) {
21916       vlen_enc = Assembler::AVX_512bit;
21917     }
21918     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21919   %}
21920   ins_pipe( pipe_slow );
21921 %}
21922 
21923 //Integer variable shift
21924 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21925   predicate(n->as_ShiftV()->is_var_shift());
21926   match(Set dst ( LShiftVI src shift));
21927   match(Set dst ( RShiftVI src shift));
21928   match(Set dst (URShiftVI src shift));
21929   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21930   ins_encode %{
21931     assert(UseAVX >= 2, "required");
21932 
21933     int opcode = this->ideal_Opcode();
21934     int vlen_enc = vector_length_encoding(this);
21935     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21936   %}
21937   ins_pipe( pipe_slow );
21938 %}
21939 
21940 //Long variable shift
21941 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21942   predicate(n->as_ShiftV()->is_var_shift());
21943   match(Set dst ( LShiftVL src shift));
21944   match(Set dst (URShiftVL src shift));
21945   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21946   ins_encode %{
21947     assert(UseAVX >= 2, "required");
21948 
21949     int opcode = this->ideal_Opcode();
21950     int vlen_enc = vector_length_encoding(this);
21951     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21952   %}
21953   ins_pipe( pipe_slow );
21954 %}
21955 
21956 //Long variable right shift arithmetic
21957 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21958   predicate(Matcher::vector_length(n) <= 4 &&
21959             n->as_ShiftV()->is_var_shift() &&
21960             UseAVX == 2);
21961   match(Set dst (RShiftVL src shift));
21962   effect(TEMP dst, TEMP vtmp);
21963   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21964   ins_encode %{
21965     int opcode = this->ideal_Opcode();
21966     int vlen_enc = vector_length_encoding(this);
21967     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21968                  $vtmp$$XMMRegister);
21969   %}
21970   ins_pipe( pipe_slow );
21971 %}
21972 
21973 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21974   predicate(n->as_ShiftV()->is_var_shift() &&
21975             UseAVX > 2);
21976   match(Set dst (RShiftVL src shift));
21977   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21978   ins_encode %{
21979     int opcode = this->ideal_Opcode();
21980     int vlen_enc = vector_length_encoding(this);
21981     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21982   %}
21983   ins_pipe( pipe_slow );
21984 %}
21985 
21986 // --------------------------------- AND --------------------------------------
21987 
21988 instruct vand(vec dst, vec src) %{
21989   predicate(UseAVX == 0);
21990   match(Set dst (AndV dst src));
21991   format %{ "pand    $dst,$src\t! and vectors" %}
21992   ins_encode %{
21993     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21994   %}
21995   ins_pipe( pipe_slow );
21996 %}
21997 
21998 instruct vand_reg(vec dst, vec src1, vec src2) %{
21999   predicate(UseAVX > 0);
22000   match(Set dst (AndV src1 src2));
22001   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
22002   ins_encode %{
22003     int vlen_enc = vector_length_encoding(this);
22004     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22005   %}
22006   ins_pipe( pipe_slow );
22007 %}
22008 
22009 instruct vand_mem(vec dst, vec src, memory mem) %{
22010   predicate((UseAVX > 0) &&
22011             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22012   match(Set dst (AndV src (LoadVector mem)));
22013   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
22014   ins_encode %{
22015     int vlen_enc = vector_length_encoding(this);
22016     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22017   %}
22018   ins_pipe( pipe_slow );
22019 %}
22020 
22021 // --------------------------------- OR ---------------------------------------
22022 
22023 instruct vor(vec dst, vec src) %{
22024   predicate(UseAVX == 0);
22025   match(Set dst (OrV dst src));
22026   format %{ "por     $dst,$src\t! or vectors" %}
22027   ins_encode %{
22028     __ por($dst$$XMMRegister, $src$$XMMRegister);
22029   %}
22030   ins_pipe( pipe_slow );
22031 %}
22032 
22033 instruct vor_reg(vec dst, vec src1, vec src2) %{
22034   predicate(UseAVX > 0);
22035   match(Set dst (OrV src1 src2));
22036   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
22037   ins_encode %{
22038     int vlen_enc = vector_length_encoding(this);
22039     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22040   %}
22041   ins_pipe( pipe_slow );
22042 %}
22043 
22044 instruct vor_mem(vec dst, vec src, memory mem) %{
22045   predicate((UseAVX > 0) &&
22046             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22047   match(Set dst (OrV src (LoadVector mem)));
22048   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
22049   ins_encode %{
22050     int vlen_enc = vector_length_encoding(this);
22051     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22052   %}
22053   ins_pipe( pipe_slow );
22054 %}
22055 
22056 // --------------------------------- XOR --------------------------------------
22057 
22058 instruct vxor(vec dst, vec src) %{
22059   predicate(UseAVX == 0);
22060   match(Set dst (XorV dst src));
22061   format %{ "pxor    $dst,$src\t! xor vectors" %}
22062   ins_encode %{
22063     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
22064   %}
22065   ins_pipe( pipe_slow );
22066 %}
22067 
22068 instruct vxor_reg(vec dst, vec src1, vec src2) %{
22069   predicate(UseAVX > 0);
22070   match(Set dst (XorV src1 src2));
22071   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
22072   ins_encode %{
22073     int vlen_enc = vector_length_encoding(this);
22074     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22075   %}
22076   ins_pipe( pipe_slow );
22077 %}
22078 
22079 instruct vxor_mem(vec dst, vec src, memory mem) %{
22080   predicate((UseAVX > 0) &&
22081             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22082   match(Set dst (XorV src (LoadVector mem)));
22083   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
22084   ins_encode %{
22085     int vlen_enc = vector_length_encoding(this);
22086     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22087   %}
22088   ins_pipe( pipe_slow );
22089 %}
22090 
22091 // --------------------------------- VectorCast --------------------------------------
22092 
22093 instruct vcastBtoX(vec dst, vec src) %{
22094   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
22095   match(Set dst (VectorCastB2X src));
22096   format %{ "vector_cast_b2x $dst,$src\t!" %}
22097   ins_encode %{
22098     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22099     int vlen_enc = vector_length_encoding(this);
22100     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22101   %}
22102   ins_pipe( pipe_slow );
22103 %}
22104 
22105 instruct vcastBtoD(legVec dst, legVec src) %{
22106   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
22107   match(Set dst (VectorCastB2X src));
22108   format %{ "vector_cast_b2x $dst,$src\t!" %}
22109   ins_encode %{
22110     int vlen_enc = vector_length_encoding(this);
22111     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22112   %}
22113   ins_pipe( pipe_slow );
22114 %}
22115 
22116 instruct castStoX(vec dst, vec src) %{
22117   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22118             Matcher::vector_length(n->in(1)) <= 8 && // src
22119             Matcher::vector_element_basic_type(n) == T_BYTE);
22120   match(Set dst (VectorCastS2X src));
22121   format %{ "vector_cast_s2x $dst,$src" %}
22122   ins_encode %{
22123     assert(UseAVX > 0, "required");
22124 
22125     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
22126     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
22127   %}
22128   ins_pipe( pipe_slow );
22129 %}
22130 
22131 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22132   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22133             Matcher::vector_length(n->in(1)) == 16 && // src
22134             Matcher::vector_element_basic_type(n) == T_BYTE);
22135   effect(TEMP dst, TEMP vtmp);
22136   match(Set dst (VectorCastS2X src));
22137   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22138   ins_encode %{
22139     assert(UseAVX > 0, "required");
22140 
22141     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22142     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22143     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22144     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22145   %}
22146   ins_pipe( pipe_slow );
22147 %}
22148 
22149 instruct vcastStoX_evex(vec dst, vec src) %{
22150   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22151             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22152   match(Set dst (VectorCastS2X src));
22153   format %{ "vector_cast_s2x $dst,$src\t!" %}
22154   ins_encode %{
22155     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22156     int src_vlen_enc = vector_length_encoding(this, $src);
22157     int vlen_enc = vector_length_encoding(this);
22158     switch (to_elem_bt) {
22159       case T_BYTE:
22160         if (!VM_Version::supports_avx512vl()) {
22161           vlen_enc = Assembler::AVX_512bit;
22162         }
22163         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22164         break;
22165       case T_INT:
22166         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22167         break;
22168       case T_FLOAT:
22169         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22170         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22171         break;
22172       case T_LONG:
22173         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22174         break;
22175       case T_DOUBLE: {
22176         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22177         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22178         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22179         break;
22180       }
22181       default:
22182         ShouldNotReachHere();
22183     }
22184   %}
22185   ins_pipe( pipe_slow );
22186 %}
22187 
22188 instruct castItoX(vec dst, vec src) %{
22189   predicate(UseAVX <= 2 &&
22190             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22191             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22192   match(Set dst (VectorCastI2X src));
22193   format %{ "vector_cast_i2x $dst,$src" %}
22194   ins_encode %{
22195     assert(UseAVX > 0, "required");
22196 
22197     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22198     int vlen_enc = vector_length_encoding(this, $src);
22199 
22200     if (to_elem_bt == T_BYTE) {
22201       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22202       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22203       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22204     } else {
22205       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22206       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22207       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22208     }
22209   %}
22210   ins_pipe( pipe_slow );
22211 %}
22212 
22213 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22214   predicate(UseAVX <= 2 &&
22215             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22216             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22217   match(Set dst (VectorCastI2X src));
22218   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22219   effect(TEMP dst, TEMP vtmp);
22220   ins_encode %{
22221     assert(UseAVX > 0, "required");
22222 
22223     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22224     int vlen_enc = vector_length_encoding(this, $src);
22225 
22226     if (to_elem_bt == T_BYTE) {
22227       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22228       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22229       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22230       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22231     } else {
22232       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22233       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22234       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22235       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22236     }
22237   %}
22238   ins_pipe( pipe_slow );
22239 %}
22240 
22241 instruct vcastItoX_evex(vec dst, vec src) %{
22242   predicate(UseAVX > 2 ||
22243             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22244   match(Set dst (VectorCastI2X src));
22245   format %{ "vector_cast_i2x $dst,$src\t!" %}
22246   ins_encode %{
22247     assert(UseAVX > 0, "required");
22248 
22249     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22250     int src_vlen_enc = vector_length_encoding(this, $src);
22251     int dst_vlen_enc = vector_length_encoding(this);
22252     switch (dst_elem_bt) {
22253       case T_BYTE:
22254         if (!VM_Version::supports_avx512vl()) {
22255           src_vlen_enc = Assembler::AVX_512bit;
22256         }
22257         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22258         break;
22259       case T_SHORT:
22260         if (!VM_Version::supports_avx512vl()) {
22261           src_vlen_enc = Assembler::AVX_512bit;
22262         }
22263         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22264         break;
22265       case T_FLOAT:
22266         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22267         break;
22268       case T_LONG:
22269         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22270         break;
22271       case T_DOUBLE:
22272         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22273         break;
22274       default:
22275         ShouldNotReachHere();
22276     }
22277   %}
22278   ins_pipe( pipe_slow );
22279 %}
22280 
22281 instruct vcastLtoBS(vec dst, vec src) %{
22282   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22283             UseAVX <= 2);
22284   match(Set dst (VectorCastL2X src));
22285   format %{ "vector_cast_l2x  $dst,$src" %}
22286   ins_encode %{
22287     assert(UseAVX > 0, "required");
22288 
22289     int vlen = Matcher::vector_length_in_bytes(this, $src);
22290     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
22291     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22292                                                       : ExternalAddress(vector_int_to_short_mask());
22293     if (vlen <= 16) {
22294       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22295       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22296       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22297     } else {
22298       assert(vlen <= 32, "required");
22299       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22300       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22301       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22302       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22303     }
22304     if (to_elem_bt == T_BYTE) {
22305       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22306     }
22307   %}
22308   ins_pipe( pipe_slow );
22309 %}
22310 
22311 instruct vcastLtoX_evex(vec dst, vec src) %{
22312   predicate(UseAVX > 2 ||
22313             (Matcher::vector_element_basic_type(n) == T_INT ||
22314              Matcher::vector_element_basic_type(n) == T_FLOAT ||
22315              Matcher::vector_element_basic_type(n) == T_DOUBLE));
22316   match(Set dst (VectorCastL2X src));
22317   format %{ "vector_cast_l2x  $dst,$src\t!" %}
22318   ins_encode %{
22319     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22320     int vlen = Matcher::vector_length_in_bytes(this, $src);
22321     int vlen_enc = vector_length_encoding(this, $src);
22322     switch (to_elem_bt) {
22323       case T_BYTE:
22324         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22325           vlen_enc = Assembler::AVX_512bit;
22326         }
22327         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22328         break;
22329       case T_SHORT:
22330         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22331           vlen_enc = Assembler::AVX_512bit;
22332         }
22333         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22334         break;
22335       case T_INT:
22336         if (vlen == 8) {
22337           if ($dst$$XMMRegister != $src$$XMMRegister) {
22338             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22339           }
22340         } else if (vlen == 16) {
22341           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22342         } else if (vlen == 32) {
22343           if (UseAVX > 2) {
22344             if (!VM_Version::supports_avx512vl()) {
22345               vlen_enc = Assembler::AVX_512bit;
22346             }
22347             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22348           } else {
22349             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22350             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22351           }
22352         } else { // vlen == 64
22353           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22354         }
22355         break;
22356       case T_FLOAT:
22357         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22358         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22359         break;
22360       case T_DOUBLE:
22361         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22362         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22363         break;
22364 
22365       default: assert(false, "%s", type2name(to_elem_bt));
22366     }
22367   %}
22368   ins_pipe( pipe_slow );
22369 %}
22370 
22371 instruct vcastFtoD_reg(vec dst, vec src) %{
22372   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22373   match(Set dst (VectorCastF2X src));
22374   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22375   ins_encode %{
22376     int vlen_enc = vector_length_encoding(this);
22377     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22378   %}
22379   ins_pipe( pipe_slow );
22380 %}
22381 
22382 
22383 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22384   predicate(!VM_Version::supports_avx10_2() &&
22385             !VM_Version::supports_avx512vl() &&
22386             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22387             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22388             is_integral_type(Matcher::vector_element_basic_type(n)));
22389   match(Set dst (VectorCastF2X src));
22390   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22391   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22392   ins_encode %{
22393     int vlen_enc = vector_length_encoding(this, $src);
22394     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22395     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22396     // 32 bit addresses for register indirect addressing mode since stub constants
22397     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22398     // However, targets are free to increase this limit, but having a large code cache size
22399     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22400     // cap we save a temporary register allocation which in limiting case can prevent
22401     // spilling in high register pressure blocks.
22402     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22403                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22404                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22405   %}
22406   ins_pipe( pipe_slow );
22407 %}
22408 
22409 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22410   predicate(!VM_Version::supports_avx10_2() &&
22411             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22412             is_integral_type(Matcher::vector_element_basic_type(n)));
22413   match(Set dst (VectorCastF2X src));
22414   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22415   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22416   ins_encode %{
22417     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22418     if (to_elem_bt == T_LONG) {
22419       int vlen_enc = vector_length_encoding(this);
22420       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22421                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22422                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22423     } else {
22424       int vlen_enc = vector_length_encoding(this, $src);
22425       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22426                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22427                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22428     }
22429   %}
22430   ins_pipe( pipe_slow );
22431 %}
22432 
22433 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22434   predicate(VM_Version::supports_avx10_2() &&
22435             is_integral_type(Matcher::vector_element_basic_type(n)));
22436   match(Set dst (VectorCastF2X src));
22437   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22438   ins_encode %{
22439     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22440     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22441     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22442   %}
22443   ins_pipe( pipe_slow );
22444 %}
22445 
22446 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22447   predicate(VM_Version::supports_avx10_2() &&
22448             is_integral_type(Matcher::vector_element_basic_type(n)));
22449   match(Set dst (VectorCastF2X (LoadVector src)));
22450   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22451   ins_encode %{
22452     int vlen = Matcher::vector_length(this);
22453     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22454     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22455     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22456   %}
22457   ins_pipe( pipe_slow );
22458 %}
22459 
22460 instruct vcastDtoF_reg(vec dst, vec src) %{
22461   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22462   match(Set dst (VectorCastD2X src));
22463   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22464   ins_encode %{
22465     int vlen_enc = vector_length_encoding(this, $src);
22466     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22467   %}
22468   ins_pipe( pipe_slow );
22469 %}
22470 
22471 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22472   predicate(!VM_Version::supports_avx10_2() &&
22473             !VM_Version::supports_avx512vl() &&
22474             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22475             is_integral_type(Matcher::vector_element_basic_type(n)));
22476   match(Set dst (VectorCastD2X src));
22477   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22478   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22479   ins_encode %{
22480     int vlen_enc = vector_length_encoding(this, $src);
22481     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22482     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22483                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22484                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22485   %}
22486   ins_pipe( pipe_slow );
22487 %}
22488 
22489 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22490   predicate(!VM_Version::supports_avx10_2() &&
22491             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22492             is_integral_type(Matcher::vector_element_basic_type(n)));
22493   match(Set dst (VectorCastD2X src));
22494   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22495   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22496   ins_encode %{
22497     int vlen_enc = vector_length_encoding(this, $src);
22498     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22499     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22500                               ExternalAddress(vector_float_signflip());
22501     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22502                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22503   %}
22504   ins_pipe( pipe_slow );
22505 %}
22506 
22507 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22508   predicate(VM_Version::supports_avx10_2() &&
22509             is_integral_type(Matcher::vector_element_basic_type(n)));
22510   match(Set dst (VectorCastD2X src));
22511   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22512   ins_encode %{
22513     int vlen_enc = vector_length_encoding(this, $src);
22514     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22515     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22516   %}
22517   ins_pipe( pipe_slow );
22518 %}
22519 
22520 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22521   predicate(VM_Version::supports_avx10_2() &&
22522             is_integral_type(Matcher::vector_element_basic_type(n)));
22523   match(Set dst (VectorCastD2X (LoadVector src)));
22524   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22525   ins_encode %{
22526     int vlen = Matcher::vector_length(this);
22527     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22528     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22529     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22530   %}
22531   ins_pipe( pipe_slow );
22532 %}
22533 
22534 instruct vucast(vec dst, vec src) %{
22535   match(Set dst (VectorUCastB2X src));
22536   match(Set dst (VectorUCastS2X src));
22537   match(Set dst (VectorUCastI2X src));
22538   format %{ "vector_ucast $dst,$src\t!" %}
22539   ins_encode %{
22540     assert(UseAVX > 0, "required");
22541 
22542     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22543     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22544     int vlen_enc = vector_length_encoding(this);
22545     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22546   %}
22547   ins_pipe( pipe_slow );
22548 %}
22549 
22550 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22551   predicate(!VM_Version::supports_avx512vl() &&
22552             Matcher::vector_length_in_bytes(n) < 64 &&
22553             Matcher::vector_element_basic_type(n) == T_INT);
22554   match(Set dst (RoundVF src));
22555   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22556   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22557   ins_encode %{
22558     int vlen_enc = vector_length_encoding(this);
22559     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22560     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22561                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22562                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22563   %}
22564   ins_pipe( pipe_slow );
22565 %}
22566 
22567 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22568   predicate((VM_Version::supports_avx512vl() ||
22569              Matcher::vector_length_in_bytes(n) == 64) &&
22570              Matcher::vector_element_basic_type(n) == T_INT);
22571   match(Set dst (RoundVF src));
22572   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22573   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22574   ins_encode %{
22575     int vlen_enc = vector_length_encoding(this);
22576     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22577     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22578                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22579                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22580   %}
22581   ins_pipe( pipe_slow );
22582 %}
22583 
22584 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22585   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22586   match(Set dst (RoundVD src));
22587   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22588   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22589   ins_encode %{
22590     int vlen_enc = vector_length_encoding(this);
22591     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22592     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22593                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22594                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22595   %}
22596   ins_pipe( pipe_slow );
22597 %}
22598 
22599 // --------------------------------- VectorMaskCmp --------------------------------------
22600 
22601 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22602   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22603             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22604             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22605             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22606   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22607   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22608   ins_encode %{
22609     int vlen_enc = vector_length_encoding(this, $src1);
22610     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22611     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22612       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22613     } else {
22614       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22615     }
22616   %}
22617   ins_pipe( pipe_slow );
22618 %}
22619 
22620 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22621   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22622             n->bottom_type()->isa_vectmask() == nullptr &&
22623             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22624   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22625   effect(TEMP ktmp);
22626   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22627   ins_encode %{
22628     int vlen_enc = Assembler::AVX_512bit;
22629     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22630     KRegister mask = k0; // The comparison itself is not being masked.
22631     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22632       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22633       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22634     } else {
22635       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22636       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22637     }
22638   %}
22639   ins_pipe( pipe_slow );
22640 %}
22641 
22642 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22643   predicate(n->bottom_type()->isa_vectmask() &&
22644             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22645   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22646   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22647   ins_encode %{
22648     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22649     int vlen_enc = vector_length_encoding(this, $src1);
22650     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22651     KRegister mask = k0; // The comparison itself is not being masked.
22652     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22653       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22654     } else {
22655       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22656     }
22657   %}
22658   ins_pipe( pipe_slow );
22659 %}
22660 
22661 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22662   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22663             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22664             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22665             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22666             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22667             (n->in(2)->get_int() == BoolTest::eq ||
22668              n->in(2)->get_int() == BoolTest::lt ||
22669              n->in(2)->get_int() == BoolTest::gt)); // cond
22670   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22671   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22672   ins_encode %{
22673     int vlen_enc = vector_length_encoding(this, $src1);
22674     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22675     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22676     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22677   %}
22678   ins_pipe( pipe_slow );
22679 %}
22680 
22681 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22682   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22683             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22684             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22685             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22686             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22687             (n->in(2)->get_int() == BoolTest::ne ||
22688              n->in(2)->get_int() == BoolTest::le ||
22689              n->in(2)->get_int() == BoolTest::ge)); // cond
22690   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22691   effect(TEMP dst, TEMP xtmp);
22692   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22693   ins_encode %{
22694     int vlen_enc = vector_length_encoding(this, $src1);
22695     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22696     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22697     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22698   %}
22699   ins_pipe( pipe_slow );
22700 %}
22701 
22702 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22703   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22704             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22705             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22706             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22707             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22708   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22709   effect(TEMP dst, TEMP xtmp);
22710   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22711   ins_encode %{
22712     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22713     int vlen_enc = vector_length_encoding(this, $src1);
22714     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22715     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22716 
22717     if (vlen_enc == Assembler::AVX_128bit) {
22718       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22719     } else {
22720       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22721     }
22722     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22723     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22724     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22725   %}
22726   ins_pipe( pipe_slow );
22727 %}
22728 
22729 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22730   predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22731              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22732              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22733   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22734   effect(TEMP ktmp);
22735   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22736   ins_encode %{
22737     assert(UseAVX > 2, "required");
22738 
22739     int vlen_enc = vector_length_encoding(this, $src1);
22740     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22741     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22742     KRegister mask = k0; // The comparison itself is not being masked.
22743     bool merge = false;
22744     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22745 
22746     switch (src1_elem_bt) {
22747       case T_INT: {
22748         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22749         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22750         break;
22751       }
22752       case T_LONG: {
22753         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22754         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22755         break;
22756       }
22757       default: assert(false, "%s", type2name(src1_elem_bt));
22758     }
22759   %}
22760   ins_pipe( pipe_slow );
22761 %}
22762 
22763 
22764 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22765   predicate(n->bottom_type()->isa_vectmask() &&
22766             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22767   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22768   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22769   ins_encode %{
22770     assert(UseAVX > 2, "required");
22771     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22772 
22773     int vlen_enc = vector_length_encoding(this, $src1);
22774     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22775     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22776     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22777 
22778     // Comparison i
22779     switch (src1_elem_bt) {
22780       case T_BYTE: {
22781         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22782         break;
22783       }
22784       case T_SHORT: {
22785         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22786         break;
22787       }
22788       case T_INT: {
22789         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22790         break;
22791       }
22792       case T_LONG: {
22793         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22794         break;
22795       }
22796       default: assert(false, "%s", type2name(src1_elem_bt));
22797     }
22798   %}
22799   ins_pipe( pipe_slow );
22800 %}
22801 
22802 // Extract
22803 
22804 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22805   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22806   match(Set dst (ExtractI src idx));
22807   match(Set dst (ExtractS src idx));
22808   match(Set dst (ExtractB src idx));
22809   format %{ "extractI $dst,$src,$idx\t!" %}
22810   ins_encode %{
22811     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22812 
22813     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22814     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22815   %}
22816   ins_pipe( pipe_slow );
22817 %}
22818 
22819 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22820   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22821             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22822   match(Set dst (ExtractI src idx));
22823   match(Set dst (ExtractS src idx));
22824   match(Set dst (ExtractB src idx));
22825   effect(TEMP vtmp);
22826   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22827   ins_encode %{
22828     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22829 
22830     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22831     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22832     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22833   %}
22834   ins_pipe( pipe_slow );
22835 %}
22836 
22837 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22838   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22839   match(Set dst (ExtractL src idx));
22840   format %{ "extractL $dst,$src,$idx\t!" %}
22841   ins_encode %{
22842     assert(UseSSE >= 4, "required");
22843     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22844 
22845     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22846   %}
22847   ins_pipe( pipe_slow );
22848 %}
22849 
22850 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22851   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22852             Matcher::vector_length(n->in(1)) == 8);  // src
22853   match(Set dst (ExtractL src idx));
22854   effect(TEMP vtmp);
22855   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22856   ins_encode %{
22857     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22858 
22859     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22860     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22861   %}
22862   ins_pipe( pipe_slow );
22863 %}
22864 
22865 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22866   predicate(Matcher::vector_length(n->in(1)) <= 4);
22867   match(Set dst (ExtractF src idx));
22868   effect(TEMP dst, TEMP vtmp);
22869   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22870   ins_encode %{
22871     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22872 
22873     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22874   %}
22875   ins_pipe( pipe_slow );
22876 %}
22877 
22878 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22879   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22880             Matcher::vector_length(n->in(1)/*src*/) == 16);
22881   match(Set dst (ExtractF src idx));
22882   effect(TEMP vtmp);
22883   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22884   ins_encode %{
22885     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22886 
22887     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22888     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22889   %}
22890   ins_pipe( pipe_slow );
22891 %}
22892 
22893 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22894   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22895   match(Set dst (ExtractD src idx));
22896   format %{ "extractD $dst,$src,$idx\t!" %}
22897   ins_encode %{
22898     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22899 
22900     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22901   %}
22902   ins_pipe( pipe_slow );
22903 %}
22904 
22905 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22906   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22907             Matcher::vector_length(n->in(1)) == 8);  // src
22908   match(Set dst (ExtractD src idx));
22909   effect(TEMP vtmp);
22910   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22911   ins_encode %{
22912     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22913 
22914     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22915     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22916   %}
22917   ins_pipe( pipe_slow );
22918 %}
22919 
22920 // --------------------------------- Vector Blend --------------------------------------
22921 
22922 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22923   predicate(UseAVX == 0);
22924   match(Set dst (VectorBlend (Binary dst src) mask));
22925   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22926   effect(TEMP tmp);
22927   ins_encode %{
22928     assert(UseSSE >= 4, "required");
22929 
22930     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22931       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22932     }
22933     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22934   %}
22935   ins_pipe( pipe_slow );
22936 %}
22937 
22938 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22939   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22940             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22941             Matcher::vector_length_in_bytes(n) <= 32 &&
22942             is_integral_type(Matcher::vector_element_basic_type(n)));
22943   match(Set dst (VectorBlend (Binary src1 src2) mask));
22944   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22945   ins_encode %{
22946     int vlen_enc = vector_length_encoding(this);
22947     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22948   %}
22949   ins_pipe( pipe_slow );
22950 %}
22951 
22952 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22953   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22954             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22955             Matcher::vector_length_in_bytes(n) <= 32 &&
22956             !is_integral_type(Matcher::vector_element_basic_type(n)));
22957   match(Set dst (VectorBlend (Binary src1 src2) mask));
22958   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22959   ins_encode %{
22960     int vlen_enc = vector_length_encoding(this);
22961     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22962   %}
22963   ins_pipe( pipe_slow );
22964 %}
22965 
22966 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22967   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22968             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22969             Matcher::vector_length_in_bytes(n) <= 32);
22970   match(Set dst (VectorBlend (Binary src1 src2) mask));
22971   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22972   effect(TEMP vtmp, TEMP dst);
22973   ins_encode %{
22974     int vlen_enc = vector_length_encoding(this);
22975     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22976     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22977     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22978   %}
22979   ins_pipe( pipe_slow );
22980 %}
22981 
22982 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22983   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22984             n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22985   match(Set dst (VectorBlend (Binary src1 src2) mask));
22986   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22987   effect(TEMP ktmp);
22988   ins_encode %{
22989      int vlen_enc = Assembler::AVX_512bit;
22990      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22991     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22992     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22993   %}
22994   ins_pipe( pipe_slow );
22995 %}
22996 
22997 
22998 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22999   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
23000             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
23001              VM_Version::supports_avx512bw()));
23002   match(Set dst (VectorBlend (Binary src1 src2) mask));
23003   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23004   ins_encode %{
23005     int vlen_enc = vector_length_encoding(this);
23006     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23007     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23008   %}
23009   ins_pipe( pipe_slow );
23010 %}
23011 
23012 // --------------------------------- ABS --------------------------------------
23013 // a = |a|
23014 instruct vabsB_reg(vec dst, vec src) %{
23015   match(Set dst (AbsVB  src));
23016   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
23017   ins_encode %{
23018     uint vlen = Matcher::vector_length(this);
23019     if (vlen <= 16) {
23020       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23021     } else {
23022       int vlen_enc = vector_length_encoding(this);
23023       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23024     }
23025   %}
23026   ins_pipe( pipe_slow );
23027 %}
23028 
23029 instruct vabsS_reg(vec dst, vec src) %{
23030   match(Set dst (AbsVS  src));
23031   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
23032   ins_encode %{
23033     uint vlen = Matcher::vector_length(this);
23034     if (vlen <= 8) {
23035       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23036     } else {
23037       int vlen_enc = vector_length_encoding(this);
23038       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23039     }
23040   %}
23041   ins_pipe( pipe_slow );
23042 %}
23043 
23044 instruct vabsI_reg(vec dst, vec src) %{
23045   match(Set dst (AbsVI  src));
23046   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
23047   ins_encode %{
23048     uint vlen = Matcher::vector_length(this);
23049     if (vlen <= 4) {
23050       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23051     } else {
23052       int vlen_enc = vector_length_encoding(this);
23053       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23054     }
23055   %}
23056   ins_pipe( pipe_slow );
23057 %}
23058 
23059 instruct vabsL_reg(vec dst, vec src) %{
23060   match(Set dst (AbsVL  src));
23061   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
23062   ins_encode %{
23063     assert(UseAVX > 2, "required");
23064     int vlen_enc = vector_length_encoding(this);
23065     if (!VM_Version::supports_avx512vl()) {
23066       vlen_enc = Assembler::AVX_512bit;
23067     }
23068     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23069   %}
23070   ins_pipe( pipe_slow );
23071 %}
23072 
23073 // --------------------------------- ABSNEG --------------------------------------
23074 
23075 instruct vabsnegF(vec dst, vec src) %{
23076   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
23077   match(Set dst (AbsVF src));
23078   match(Set dst (NegVF src));
23079   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
23080   ins_cost(150);
23081   ins_encode %{
23082     int opcode = this->ideal_Opcode();
23083     int vlen = Matcher::vector_length(this);
23084     if (vlen == 2) {
23085       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23086     } else {
23087       assert(vlen == 8 || vlen == 16, "required");
23088       int vlen_enc = vector_length_encoding(this);
23089       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23090     }
23091   %}
23092   ins_pipe( pipe_slow );
23093 %}
23094 
23095 instruct vabsneg4F(vec dst) %{
23096   predicate(Matcher::vector_length(n) == 4);
23097   match(Set dst (AbsVF dst));
23098   match(Set dst (NegVF dst));
23099   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
23100   ins_cost(150);
23101   ins_encode %{
23102     int opcode = this->ideal_Opcode();
23103     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
23104   %}
23105   ins_pipe( pipe_slow );
23106 %}
23107 
23108 instruct vabsnegD(vec dst, vec src) %{
23109   match(Set dst (AbsVD  src));
23110   match(Set dst (NegVD  src));
23111   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
23112   ins_encode %{
23113     int opcode = this->ideal_Opcode();
23114     uint vlen = Matcher::vector_length(this);
23115     if (vlen == 2) {
23116       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23117     } else {
23118       int vlen_enc = vector_length_encoding(this);
23119       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23120     }
23121   %}
23122   ins_pipe( pipe_slow );
23123 %}
23124 
23125 //------------------------------------- VectorTest --------------------------------------------
23126 
23127 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
23128   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
23129   match(Set cr (VectorTest src1 src2));
23130   effect(TEMP vtmp);
23131   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
23132   ins_encode %{
23133     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23134     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23135     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23136   %}
23137   ins_pipe( pipe_slow );
23138 %}
23139 
23140 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23141   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23142   match(Set cr (VectorTest src1 src2));
23143   format %{ "vptest_ge16  $src1, $src2\n\t" %}
23144   ins_encode %{
23145     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23146     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23147     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23148   %}
23149   ins_pipe( pipe_slow );
23150 %}
23151 
23152 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23153   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23154              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23155             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23156   match(Set cr (VectorTest src1 src2));
23157   effect(TEMP tmp);
23158   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23159   ins_encode %{
23160     uint masklen = Matcher::vector_length(this, $src1);
23161     __ kmovwl($tmp$$Register, $src1$$KRegister);
23162     __ andl($tmp$$Register, (1 << masklen) - 1);
23163     __ cmpl($tmp$$Register, (1 << masklen) - 1);
23164   %}
23165   ins_pipe( pipe_slow );
23166 %}
23167 
23168 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23169   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23170              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23171             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23172   match(Set cr (VectorTest src1 src2));
23173   effect(TEMP tmp);
23174   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23175   ins_encode %{
23176     uint masklen = Matcher::vector_length(this, $src1);
23177     __ kmovwl($tmp$$Register, $src1$$KRegister);
23178     __ andl($tmp$$Register, (1 << masklen) - 1);
23179   %}
23180   ins_pipe( pipe_slow );
23181 %}
23182 
23183 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23184   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23185             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23186   match(Set cr (VectorTest src1 src2));
23187   format %{ "ktest_ge8  $src1, $src2\n\t" %}
23188   ins_encode %{
23189     uint masklen = Matcher::vector_length(this, $src1);
23190     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23191   %}
23192   ins_pipe( pipe_slow );
23193 %}
23194 
23195 //------------------------------------- LoadMask --------------------------------------------
23196 
23197 instruct loadMask(legVec dst, legVec src) %{
23198   predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23199   match(Set dst (VectorLoadMask src));
23200   effect(TEMP dst);
23201   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23202   ins_encode %{
23203     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23204     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23205     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23206   %}
23207   ins_pipe( pipe_slow );
23208 %}
23209 
23210 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23211   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23212   match(Set dst (VectorLoadMask src));
23213   effect(TEMP xtmp);
23214   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23215   ins_encode %{
23216     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23217                         true, Assembler::AVX_512bit);
23218   %}
23219   ins_pipe( pipe_slow );
23220 %}
23221 
23222 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
23223   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23224   match(Set dst (VectorLoadMask src));
23225   effect(TEMP xtmp);
23226   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23227   ins_encode %{
23228     int vlen_enc = vector_length_encoding(in(1));
23229     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23230                         false, vlen_enc);
23231   %}
23232   ins_pipe( pipe_slow );
23233 %}
23234 
23235 //------------------------------------- StoreMask --------------------------------------------
23236 
23237 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23238   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23239   match(Set dst (VectorStoreMask src size));
23240   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23241   ins_encode %{
23242     int vlen = Matcher::vector_length(this);
23243     if (vlen <= 16 && UseAVX <= 2) {
23244       assert(UseSSE >= 3, "required");
23245       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23246     } else {
23247       assert(UseAVX > 0, "required");
23248       int src_vlen_enc = vector_length_encoding(this, $src);
23249       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23250     }
23251   %}
23252   ins_pipe( pipe_slow );
23253 %}
23254 
23255 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23256   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23257   match(Set dst (VectorStoreMask src size));
23258   effect(TEMP_DEF dst, TEMP xtmp);
23259   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23260   ins_encode %{
23261     int vlen_enc = Assembler::AVX_128bit;
23262     int vlen = Matcher::vector_length(this);
23263     if (vlen <= 8) {
23264       assert(UseSSE >= 3, "required");
23265       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23266       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23267       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23268     } else {
23269       assert(UseAVX > 0, "required");
23270       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23271       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23272       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23273     }
23274   %}
23275   ins_pipe( pipe_slow );
23276 %}
23277 
23278 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23279   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23280   match(Set dst (VectorStoreMask src size));
23281   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23282   effect(TEMP_DEF dst, TEMP xtmp);
23283   ins_encode %{
23284     int vlen_enc = Assembler::AVX_128bit;
23285     int vlen = Matcher::vector_length(this);
23286     if (vlen <= 4) {
23287       assert(UseSSE >= 3, "required");
23288       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23289       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23290       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23291       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23292     } else {
23293       assert(UseAVX > 0, "required");
23294       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23295       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23296       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23297       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23298       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23299     }
23300   %}
23301   ins_pipe( pipe_slow );
23302 %}
23303 
23304 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23305   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23306   match(Set dst (VectorStoreMask src size));
23307   effect(TEMP_DEF dst, TEMP xtmp);
23308   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23309   ins_encode %{
23310     assert(UseSSE >= 3, "required");
23311     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23312     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23313     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23314     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23315     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23316   %}
23317   ins_pipe( pipe_slow );
23318 %}
23319 
23320 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23321   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23322   match(Set dst (VectorStoreMask src size));
23323   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23324   effect(TEMP_DEF dst, TEMP vtmp);
23325   ins_encode %{
23326     int vlen_enc = Assembler::AVX_128bit;
23327     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23328     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23329     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23330     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23331     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23332     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23333     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23334   %}
23335   ins_pipe( pipe_slow );
23336 %}
23337 
23338 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23339   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23340   match(Set dst (VectorStoreMask src size));
23341   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23342   ins_encode %{
23343     int src_vlen_enc = vector_length_encoding(this, $src);
23344     int dst_vlen_enc = vector_length_encoding(this);
23345     if (!VM_Version::supports_avx512vl()) {
23346       src_vlen_enc = Assembler::AVX_512bit;
23347     }
23348     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23349     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23350   %}
23351   ins_pipe( pipe_slow );
23352 %}
23353 
23354 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23355   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23356   match(Set dst (VectorStoreMask src size));
23357   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23358   ins_encode %{
23359     int src_vlen_enc = vector_length_encoding(this, $src);
23360     int dst_vlen_enc = vector_length_encoding(this);
23361     if (!VM_Version::supports_avx512vl()) {
23362       src_vlen_enc = Assembler::AVX_512bit;
23363     }
23364     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23365     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23366   %}
23367   ins_pipe( pipe_slow );
23368 %}
23369 
23370 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23371   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23372   match(Set dst (VectorStoreMask mask size));
23373   effect(TEMP_DEF dst);
23374   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23375   ins_encode %{
23376     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23377     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23378                  false, Assembler::AVX_512bit, noreg);
23379     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23380   %}
23381   ins_pipe( pipe_slow );
23382 %}
23383 
23384 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23385   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23386   match(Set dst (VectorStoreMask mask size));
23387   effect(TEMP_DEF dst);
23388   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23389   ins_encode %{
23390     int dst_vlen_enc = vector_length_encoding(this);
23391     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23392     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23393   %}
23394   ins_pipe( pipe_slow );
23395 %}
23396 
23397 instruct vmaskcast_evex(kReg dst) %{
23398   match(Set dst (VectorMaskCast dst));
23399   ins_cost(0);
23400   format %{ "vector_mask_cast $dst" %}
23401   ins_encode %{
23402     // empty
23403   %}
23404   ins_pipe(empty);
23405 %}
23406 
23407 instruct vmaskcast(vec dst) %{
23408   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23409   match(Set dst (VectorMaskCast dst));
23410   ins_cost(0);
23411   format %{ "vector_mask_cast $dst" %}
23412   ins_encode %{
23413     // empty
23414   %}
23415   ins_pipe(empty);
23416 %}
23417 
23418 instruct vmaskcast_avx(vec dst, vec src) %{
23419   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23420   match(Set dst (VectorMaskCast src));
23421   format %{ "vector_mask_cast $dst, $src" %}
23422   ins_encode %{
23423     int vlen = Matcher::vector_length(this);
23424     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23425     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23426     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23427   %}
23428   ins_pipe(pipe_slow);
23429 %}
23430 
23431 //-------------------------------- Load Iota Indices ----------------------------------
23432 
23433 instruct loadIotaIndices(vec dst, immI_0 src) %{
23434   match(Set dst (VectorLoadConst src));
23435   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23436   ins_encode %{
23437      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23438      BasicType bt = Matcher::vector_element_basic_type(this);
23439      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23440   %}
23441   ins_pipe( pipe_slow );
23442 %}
23443 
23444 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23445   match(Set dst (PopulateIndex src1 src2));
23446   effect(TEMP dst, TEMP vtmp);
23447   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23448   ins_encode %{
23449      assert($src2$$constant == 1, "required");
23450      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23451      int vlen_enc = vector_length_encoding(this);
23452      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23453      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23454      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23455      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23456   %}
23457   ins_pipe( pipe_slow );
23458 %}
23459 
23460 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23461   match(Set dst (PopulateIndex src1 src2));
23462   effect(TEMP dst, TEMP vtmp);
23463   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23464   ins_encode %{
23465      assert($src2$$constant == 1, "required");
23466      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23467      int vlen_enc = vector_length_encoding(this);
23468      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23469      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23470      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23471      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23472   %}
23473   ins_pipe( pipe_slow );
23474 %}
23475 
23476 //-------------------------------- Rearrange ----------------------------------
23477 
23478 // LoadShuffle/Rearrange for Byte
23479 instruct rearrangeB(vec dst, vec shuffle) %{
23480   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23481             Matcher::vector_length(n) < 32);
23482   match(Set dst (VectorRearrange dst shuffle));
23483   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23484   ins_encode %{
23485     assert(UseSSE >= 4, "required");
23486     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23487   %}
23488   ins_pipe( pipe_slow );
23489 %}
23490 
23491 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23492   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23493             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23494   match(Set dst (VectorRearrange src shuffle));
23495   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23496   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23497   ins_encode %{
23498     assert(UseAVX >= 2, "required");
23499     // Swap src into vtmp1
23500     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23501     // Shuffle swapped src to get entries from other 128 bit lane
23502     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23503     // Shuffle original src to get entries from self 128 bit lane
23504     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23505     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23506     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23507     // Perform the blend
23508     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23509   %}
23510   ins_pipe( pipe_slow );
23511 %}
23512 
23513 
23514 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23515   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23516             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23517   match(Set dst (VectorRearrange src shuffle));
23518   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23519   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23520   ins_encode %{
23521     int vlen_enc = vector_length_encoding(this);
23522     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23523                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23524                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23525   %}
23526   ins_pipe( pipe_slow );
23527 %}
23528 
23529 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23530   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23531             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23532   match(Set dst (VectorRearrange src shuffle));
23533   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23534   ins_encode %{
23535     int vlen_enc = vector_length_encoding(this);
23536     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23537   %}
23538   ins_pipe( pipe_slow );
23539 %}
23540 
23541 // LoadShuffle/Rearrange for Short
23542 
23543 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23544   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23545             !VM_Version::supports_avx512bw());
23546   match(Set dst (VectorLoadShuffle src));
23547   effect(TEMP dst, TEMP vtmp);
23548   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23549   ins_encode %{
23550     // Create a byte shuffle mask from short shuffle mask
23551     // only byte shuffle instruction available on these platforms
23552     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23553     if (UseAVX == 0) {
23554       assert(vlen_in_bytes <= 16, "required");
23555       // Multiply each shuffle by two to get byte index
23556       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23557       __ psllw($vtmp$$XMMRegister, 1);
23558 
23559       // Duplicate to create 2 copies of byte index
23560       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23561       __ psllw($dst$$XMMRegister, 8);
23562       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23563 
23564       // Add one to get alternate byte index
23565       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23566       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23567     } else {
23568       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23569       int vlen_enc = vector_length_encoding(this);
23570       // Multiply each shuffle by two to get byte index
23571       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23572 
23573       // Duplicate to create 2 copies of byte index
23574       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23575       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23576 
23577       // Add one to get alternate byte index
23578       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23579     }
23580   %}
23581   ins_pipe( pipe_slow );
23582 %}
23583 
23584 instruct rearrangeS(vec dst, vec shuffle) %{
23585   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23586             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23587   match(Set dst (VectorRearrange dst shuffle));
23588   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23589   ins_encode %{
23590     assert(UseSSE >= 4, "required");
23591     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23592   %}
23593   ins_pipe( pipe_slow );
23594 %}
23595 
23596 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23597   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23598             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23599   match(Set dst (VectorRearrange src shuffle));
23600   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23601   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23602   ins_encode %{
23603     assert(UseAVX >= 2, "required");
23604     // Swap src into vtmp1
23605     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23606     // Shuffle swapped src to get entries from other 128 bit lane
23607     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23608     // Shuffle original src to get entries from self 128 bit lane
23609     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23610     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23611     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23612     // Perform the blend
23613     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23614   %}
23615   ins_pipe( pipe_slow );
23616 %}
23617 
23618 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23619   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23620             VM_Version::supports_avx512bw());
23621   match(Set dst (VectorRearrange src shuffle));
23622   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23623   ins_encode %{
23624     int vlen_enc = vector_length_encoding(this);
23625     if (!VM_Version::supports_avx512vl()) {
23626       vlen_enc = Assembler::AVX_512bit;
23627     }
23628     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23629   %}
23630   ins_pipe( pipe_slow );
23631 %}
23632 
23633 // LoadShuffle/Rearrange for Integer and Float
23634 
23635 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23636   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23637             Matcher::vector_length(n) == 4 && UseAVX == 0);
23638   match(Set dst (VectorLoadShuffle src));
23639   effect(TEMP dst, TEMP vtmp);
23640   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23641   ins_encode %{
23642     assert(UseSSE >= 4, "required");
23643 
23644     // Create a byte shuffle mask from int shuffle mask
23645     // only byte shuffle instruction available on these platforms
23646 
23647     // Duplicate and multiply each shuffle by 4
23648     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23649     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23650     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23651     __ psllw($vtmp$$XMMRegister, 2);
23652 
23653     // Duplicate again to create 4 copies of byte index
23654     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23655     __ psllw($dst$$XMMRegister, 8);
23656     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23657 
23658     // Add 3,2,1,0 to get alternate byte index
23659     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23660     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23661   %}
23662   ins_pipe( pipe_slow );
23663 %}
23664 
23665 instruct rearrangeI(vec dst, vec shuffle) %{
23666   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23667             UseAVX == 0);
23668   match(Set dst (VectorRearrange dst shuffle));
23669   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23670   ins_encode %{
23671     assert(UseSSE >= 4, "required");
23672     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23673   %}
23674   ins_pipe( pipe_slow );
23675 %}
23676 
23677 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23678   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23679             UseAVX > 0);
23680   match(Set dst (VectorRearrange src shuffle));
23681   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23682   ins_encode %{
23683     int vlen_enc = vector_length_encoding(this);
23684     BasicType bt = Matcher::vector_element_basic_type(this);
23685     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23686   %}
23687   ins_pipe( pipe_slow );
23688 %}
23689 
23690 // LoadShuffle/Rearrange for Long and Double
23691 
23692 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23693   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23694             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23695   match(Set dst (VectorLoadShuffle src));
23696   effect(TEMP dst, TEMP vtmp);
23697   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23698   ins_encode %{
23699     assert(UseAVX >= 2, "required");
23700 
23701     int vlen_enc = vector_length_encoding(this);
23702     // Create a double word shuffle mask from long shuffle mask
23703     // only double word shuffle instruction available on these platforms
23704 
23705     // Multiply each shuffle by two to get double word index
23706     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23707 
23708     // Duplicate each double word shuffle
23709     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23710     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23711 
23712     // Add one to get alternate double word index
23713     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23714   %}
23715   ins_pipe( pipe_slow );
23716 %}
23717 
23718 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23719   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23720             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23721   match(Set dst (VectorRearrange src shuffle));
23722   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23723   ins_encode %{
23724     assert(UseAVX >= 2, "required");
23725 
23726     int vlen_enc = vector_length_encoding(this);
23727     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23728   %}
23729   ins_pipe( pipe_slow );
23730 %}
23731 
23732 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23733   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23734             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23735   match(Set dst (VectorRearrange src shuffle));
23736   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23737   ins_encode %{
23738     assert(UseAVX > 2, "required");
23739 
23740     int vlen_enc = vector_length_encoding(this);
23741     if (vlen_enc == Assembler::AVX_128bit) {
23742       vlen_enc = Assembler::AVX_256bit;
23743     }
23744     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23745   %}
23746   ins_pipe( pipe_slow );
23747 %}
23748 
23749 // --------------------------------- FMA --------------------------------------
23750 // a * b + c
23751 
23752 instruct vfmaF_reg(vec a, vec b, vec c) %{
23753   match(Set c (FmaVF  c (Binary a b)));
23754   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23755   ins_cost(150);
23756   ins_encode %{
23757     assert(UseFMA, "not enabled");
23758     int vlen_enc = vector_length_encoding(this);
23759     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23760   %}
23761   ins_pipe( pipe_slow );
23762 %}
23763 
23764 instruct vfmaF_mem(vec a, memory b, vec c) %{
23765   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23766   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23767   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23768   ins_cost(150);
23769   ins_encode %{
23770     assert(UseFMA, "not enabled");
23771     int vlen_enc = vector_length_encoding(this);
23772     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23773   %}
23774   ins_pipe( pipe_slow );
23775 %}
23776 
23777 instruct vfmaD_reg(vec a, vec b, vec c) %{
23778   match(Set c (FmaVD  c (Binary a b)));
23779   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23780   ins_cost(150);
23781   ins_encode %{
23782     assert(UseFMA, "not enabled");
23783     int vlen_enc = vector_length_encoding(this);
23784     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23785   %}
23786   ins_pipe( pipe_slow );
23787 %}
23788 
23789 instruct vfmaD_mem(vec a, memory b, vec c) %{
23790   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23791   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23792   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23793   ins_cost(150);
23794   ins_encode %{
23795     assert(UseFMA, "not enabled");
23796     int vlen_enc = vector_length_encoding(this);
23797     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23798   %}
23799   ins_pipe( pipe_slow );
23800 %}
23801 
23802 // --------------------------------- Vector Multiply Add --------------------------------------
23803 
23804 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23805   predicate(UseAVX == 0);
23806   match(Set dst (MulAddVS2VI dst src1));
23807   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23808   ins_encode %{
23809     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23810   %}
23811   ins_pipe( pipe_slow );
23812 %}
23813 
23814 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23815   predicate(UseAVX > 0);
23816   match(Set dst (MulAddVS2VI src1 src2));
23817   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23818   ins_encode %{
23819     int vlen_enc = vector_length_encoding(this);
23820     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23821   %}
23822   ins_pipe( pipe_slow );
23823 %}
23824 
23825 // --------------------------------- Vector Multiply Add Add ----------------------------------
23826 
23827 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23828   predicate(VM_Version::supports_avx512_vnni());
23829   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23830   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23831   ins_encode %{
23832     assert(UseAVX > 2, "required");
23833     int vlen_enc = vector_length_encoding(this);
23834     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23835   %}
23836   ins_pipe( pipe_slow );
23837   ins_cost(10);
23838 %}
23839 
23840 // --------------------------------- PopCount --------------------------------------
23841 
23842 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23843   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23844   match(Set dst (PopCountVI src));
23845   match(Set dst (PopCountVL src));
23846   format %{ "vector_popcount_integral $dst, $src" %}
23847   ins_encode %{
23848     int opcode = this->ideal_Opcode();
23849     int vlen_enc = vector_length_encoding(this, $src);
23850     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23851     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23852   %}
23853   ins_pipe( pipe_slow );
23854 %}
23855 
23856 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23857   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23858   match(Set dst (PopCountVI src mask));
23859   match(Set dst (PopCountVL src mask));
23860   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23861   ins_encode %{
23862     int vlen_enc = vector_length_encoding(this, $src);
23863     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23864     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23865     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23866   %}
23867   ins_pipe( pipe_slow );
23868 %}
23869 
23870 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23871   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23872   match(Set dst (PopCountVI src));
23873   match(Set dst (PopCountVL src));
23874   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23875   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23876   ins_encode %{
23877     int opcode = this->ideal_Opcode();
23878     int vlen_enc = vector_length_encoding(this, $src);
23879     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23880     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23881                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23882   %}
23883   ins_pipe( pipe_slow );
23884 %}
23885 
23886 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23887 
23888 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23889   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23890                                               Matcher::vector_length_in_bytes(n->in(1))));
23891   match(Set dst (CountTrailingZerosV src));
23892   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23893   ins_cost(400);
23894   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23895   ins_encode %{
23896     int vlen_enc = vector_length_encoding(this, $src);
23897     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23898     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23899                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23900   %}
23901   ins_pipe( pipe_slow );
23902 %}
23903 
23904 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23905   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23906             VM_Version::supports_avx512cd() &&
23907             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23908   match(Set dst (CountTrailingZerosV src));
23909   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23910   ins_cost(400);
23911   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23912   ins_encode %{
23913     int vlen_enc = vector_length_encoding(this, $src);
23914     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23915     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23916                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23917   %}
23918   ins_pipe( pipe_slow );
23919 %}
23920 
23921 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23922   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23923   match(Set dst (CountTrailingZerosV src));
23924   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23925   ins_cost(400);
23926   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23927   ins_encode %{
23928     int vlen_enc = vector_length_encoding(this, $src);
23929     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23930     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23931                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23932                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23933   %}
23934   ins_pipe( pipe_slow );
23935 %}
23936 
23937 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23938   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23939   match(Set dst (CountTrailingZerosV src));
23940   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23941   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23942   ins_encode %{
23943     int vlen_enc = vector_length_encoding(this, $src);
23944     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23945     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23946                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23947   %}
23948   ins_pipe( pipe_slow );
23949 %}
23950 
23951 
23952 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23953 
23954 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23955   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23956   effect(TEMP dst);
23957   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23958   ins_encode %{
23959     int vector_len = vector_length_encoding(this);
23960     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23961   %}
23962   ins_pipe( pipe_slow );
23963 %}
23964 
23965 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23966   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23967   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23968   effect(TEMP dst);
23969   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23970   ins_encode %{
23971     int vector_len = vector_length_encoding(this);
23972     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23973   %}
23974   ins_pipe( pipe_slow );
23975 %}
23976 
23977 // --------------------------------- Rotation Operations ----------------------------------
23978 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23979   match(Set dst (RotateLeftV src shift));
23980   match(Set dst (RotateRightV src shift));
23981   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23982   ins_encode %{
23983     int opcode      = this->ideal_Opcode();
23984     int vector_len  = vector_length_encoding(this);
23985     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23986     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23987   %}
23988   ins_pipe( pipe_slow );
23989 %}
23990 
23991 instruct vprorate(vec dst, vec src, vec shift) %{
23992   match(Set dst (RotateLeftV src shift));
23993   match(Set dst (RotateRightV src shift));
23994   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23995   ins_encode %{
23996     int opcode      = this->ideal_Opcode();
23997     int vector_len  = vector_length_encoding(this);
23998     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23999     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
24000   %}
24001   ins_pipe( pipe_slow );
24002 %}
24003 
24004 // ---------------------------------- Masked Operations ------------------------------------
24005 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
24006   predicate(!n->in(3)->bottom_type()->isa_vectmask());
24007   match(Set dst (LoadVectorMasked mem mask));
24008   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24009   ins_encode %{
24010     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24011     int vlen_enc = vector_length_encoding(this);
24012     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
24013   %}
24014   ins_pipe( pipe_slow );
24015 %}
24016 
24017 
24018 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
24019   predicate(n->in(3)->bottom_type()->isa_vectmask());
24020   match(Set dst (LoadVectorMasked mem mask));
24021   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24022   ins_encode %{
24023     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
24024     int vector_len = vector_length_encoding(this);
24025     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
24026   %}
24027   ins_pipe( pipe_slow );
24028 %}
24029 
24030 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
24031   predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
24032   match(Set mem (StoreVectorMasked mem (Binary src mask)));
24033   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24034   ins_encode %{
24035     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24036     int vlen_enc = vector_length_encoding(src_node);
24037     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
24038     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
24039   %}
24040   ins_pipe( pipe_slow );
24041 %}
24042 
24043 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
24044   predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
24045   match(Set mem (StoreVectorMasked mem (Binary src mask)));
24046   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24047   ins_encode %{
24048     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24049     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
24050     int vlen_enc = vector_length_encoding(src_node);
24051     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
24052   %}
24053   ins_pipe( pipe_slow );
24054 %}
24055 
24056 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
24057   match(Set addr (VerifyVectorAlignment addr mask));
24058   effect(KILL cr);
24059   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
24060   ins_encode %{
24061     Label Lskip;
24062     // check if masked bits of addr are zero
24063     __ testq($addr$$Register, $mask$$constant);
24064     __ jccb(Assembler::equal, Lskip);
24065     __ stop("verify_vector_alignment found a misaligned vector memory access");
24066     __ bind(Lskip);
24067   %}
24068   ins_pipe(pipe_slow);
24069 %}
24070 
24071 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
24072   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
24073   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
24074   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
24075   ins_encode %{
24076     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
24077     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
24078 
24079     Label DONE;
24080     int vlen_enc = vector_length_encoding(this, $src1);
24081     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
24082 
24083     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
24084     __ mov64($dst$$Register, -1L);
24085     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
24086     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
24087     __ jccb(Assembler::carrySet, DONE);
24088     __ kmovql($dst$$Register, $ktmp1$$KRegister);
24089     __ notq($dst$$Register);
24090     __ tzcntq($dst$$Register, $dst$$Register);
24091     __ bind(DONE);
24092   %}
24093   ins_pipe( pipe_slow );
24094 %}
24095 
24096 
24097 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
24098   match(Set dst (VectorMaskGen len));
24099   effect(TEMP temp, KILL cr);
24100   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
24101   ins_encode %{
24102     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
24103   %}
24104   ins_pipe( pipe_slow );
24105 %}
24106 
24107 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
24108   match(Set dst (VectorMaskGen len));
24109   format %{ "vector_mask_gen $len \t! vector mask generator" %}
24110   effect(TEMP temp);
24111   ins_encode %{
24112     __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
24113     __ kmovql($dst$$KRegister, $temp$$Register);
24114   %}
24115   ins_pipe( pipe_slow );
24116 %}
24117 
24118 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
24119   predicate(n->in(1)->bottom_type()->isa_vectmask());
24120   match(Set dst (VectorMaskToLong mask));
24121   effect(TEMP dst, KILL cr);
24122   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
24123   ins_encode %{
24124     int opcode = this->ideal_Opcode();
24125     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24126     int mask_len = Matcher::vector_length(this, $mask);
24127     int mask_size = mask_len * type2aelembytes(mbt);
24128     int vlen_enc = vector_length_encoding(this, $mask);
24129     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24130                              $dst$$Register, mask_len, mask_size, vlen_enc);
24131   %}
24132   ins_pipe( pipe_slow );
24133 %}
24134 
24135 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24136   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24137   match(Set dst (VectorMaskToLong mask));
24138   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24139   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24140   ins_encode %{
24141     int opcode = this->ideal_Opcode();
24142     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24143     int mask_len = Matcher::vector_length(this, $mask);
24144     int vlen_enc = vector_length_encoding(this, $mask);
24145     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24146                              $dst$$Register, mask_len, mbt, vlen_enc);
24147   %}
24148   ins_pipe( pipe_slow );
24149 %}
24150 
24151 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24152   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24153   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24154   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24155   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24156   ins_encode %{
24157     int opcode = this->ideal_Opcode();
24158     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24159     int mask_len = Matcher::vector_length(this, $mask);
24160     int vlen_enc = vector_length_encoding(this, $mask);
24161     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24162                              $dst$$Register, mask_len, mbt, vlen_enc);
24163   %}
24164   ins_pipe( pipe_slow );
24165 %}
24166 
24167 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24168   predicate(n->in(1)->bottom_type()->isa_vectmask());
24169   match(Set dst (VectorMaskTrueCount mask));
24170   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24171   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24172   ins_encode %{
24173     int opcode = this->ideal_Opcode();
24174     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24175     int mask_len = Matcher::vector_length(this, $mask);
24176     int mask_size = mask_len * type2aelembytes(mbt);
24177     int vlen_enc = vector_length_encoding(this, $mask);
24178     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24179                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24180   %}
24181   ins_pipe( pipe_slow );
24182 %}
24183 
24184 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24185   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24186   match(Set dst (VectorMaskTrueCount mask));
24187   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24188   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24189   ins_encode %{
24190     int opcode = this->ideal_Opcode();
24191     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24192     int mask_len = Matcher::vector_length(this, $mask);
24193     int vlen_enc = vector_length_encoding(this, $mask);
24194     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24195                              $tmp$$Register, mask_len, mbt, vlen_enc);
24196   %}
24197   ins_pipe( pipe_slow );
24198 %}
24199 
24200 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24201   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24202   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24203   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24204   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24205   ins_encode %{
24206     int opcode = this->ideal_Opcode();
24207     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24208     int mask_len = Matcher::vector_length(this, $mask);
24209     int vlen_enc = vector_length_encoding(this, $mask);
24210     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24211                              $tmp$$Register, mask_len, mbt, vlen_enc);
24212   %}
24213   ins_pipe( pipe_slow );
24214 %}
24215 
24216 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24217   predicate(n->in(1)->bottom_type()->isa_vectmask());
24218   match(Set dst (VectorMaskFirstTrue mask));
24219   match(Set dst (VectorMaskLastTrue mask));
24220   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24221   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24222   ins_encode %{
24223     int opcode = this->ideal_Opcode();
24224     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24225     int mask_len = Matcher::vector_length(this, $mask);
24226     int mask_size = mask_len * type2aelembytes(mbt);
24227     int vlen_enc = vector_length_encoding(this, $mask);
24228     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24229                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24230   %}
24231   ins_pipe( pipe_slow );
24232 %}
24233 
24234 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24235   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24236   match(Set dst (VectorMaskFirstTrue mask));
24237   match(Set dst (VectorMaskLastTrue mask));
24238   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24239   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24240   ins_encode %{
24241     int opcode = this->ideal_Opcode();
24242     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24243     int mask_len = Matcher::vector_length(this, $mask);
24244     int vlen_enc = vector_length_encoding(this, $mask);
24245     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24246                              $tmp$$Register, mask_len, mbt, vlen_enc);
24247   %}
24248   ins_pipe( pipe_slow );
24249 %}
24250 
24251 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24252   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24253   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24254   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24255   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24256   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24257   ins_encode %{
24258     int opcode = this->ideal_Opcode();
24259     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24260     int mask_len = Matcher::vector_length(this, $mask);
24261     int vlen_enc = vector_length_encoding(this, $mask);
24262     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24263                              $tmp$$Register, mask_len, mbt, vlen_enc);
24264   %}
24265   ins_pipe( pipe_slow );
24266 %}
24267 
24268 // --------------------------------- Compress/Expand Operations ---------------------------
24269 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24270   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24271   match(Set dst (CompressV src mask));
24272   match(Set dst (ExpandV src mask));
24273   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24274   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24275   ins_encode %{
24276     int opcode = this->ideal_Opcode();
24277     int vlen_enc = vector_length_encoding(this);
24278     BasicType bt  = Matcher::vector_element_basic_type(this);
24279     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24280                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24281   %}
24282   ins_pipe( pipe_slow );
24283 %}
24284 
24285 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24286   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24287   match(Set dst (CompressV src mask));
24288   match(Set dst (ExpandV src mask));
24289   format %{ "vector_compress_expand $dst, $src, $mask" %}
24290   ins_encode %{
24291     int opcode = this->ideal_Opcode();
24292     int vector_len = vector_length_encoding(this);
24293     BasicType bt  = Matcher::vector_element_basic_type(this);
24294     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24295   %}
24296   ins_pipe( pipe_slow );
24297 %}
24298 
24299 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24300   match(Set dst (CompressM mask));
24301   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24302   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24303   ins_encode %{
24304     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24305     int mask_len = Matcher::vector_length(this);
24306     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24307   %}
24308   ins_pipe( pipe_slow );
24309 %}
24310 
24311 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24312 
24313 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24314   predicate(!VM_Version::supports_gfni());
24315   match(Set dst (ReverseV src));
24316   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24317   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24318   ins_encode %{
24319     int vec_enc = vector_length_encoding(this);
24320     BasicType bt = Matcher::vector_element_basic_type(this);
24321     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24322                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24323   %}
24324   ins_pipe( pipe_slow );
24325 %}
24326 
24327 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24328   predicate(VM_Version::supports_gfni());
24329   match(Set dst (ReverseV src));
24330   effect(TEMP dst, TEMP xtmp);
24331   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24332   ins_encode %{
24333     int vec_enc = vector_length_encoding(this);
24334     BasicType bt  = Matcher::vector_element_basic_type(this);
24335     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24336     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24337                                $xtmp$$XMMRegister);
24338   %}
24339   ins_pipe( pipe_slow );
24340 %}
24341 
24342 instruct vreverse_byte_reg(vec dst, vec src) %{
24343   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24344   match(Set dst (ReverseBytesV src));
24345   effect(TEMP dst);
24346   format %{ "vector_reverse_byte $dst, $src" %}
24347   ins_encode %{
24348     int vec_enc = vector_length_encoding(this);
24349     BasicType bt = Matcher::vector_element_basic_type(this);
24350     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24351   %}
24352   ins_pipe( pipe_slow );
24353 %}
24354 
24355 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24356   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24357   match(Set dst (ReverseBytesV src));
24358   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24359   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24360   ins_encode %{
24361     int vec_enc = vector_length_encoding(this);
24362     BasicType bt = Matcher::vector_element_basic_type(this);
24363     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24364                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24365   %}
24366   ins_pipe( pipe_slow );
24367 %}
24368 
24369 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24370 
24371 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24372   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24373                                               Matcher::vector_length_in_bytes(n->in(1))));
24374   match(Set dst (CountLeadingZerosV src));
24375   format %{ "vector_count_leading_zeros $dst, $src" %}
24376   ins_encode %{
24377      int vlen_enc = vector_length_encoding(this, $src);
24378      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24379      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24380                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24381   %}
24382   ins_pipe( pipe_slow );
24383 %}
24384 
24385 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24386   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24387                                               Matcher::vector_length_in_bytes(n->in(1))));
24388   match(Set dst (CountLeadingZerosV src mask));
24389   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24390   ins_encode %{
24391     int vlen_enc = vector_length_encoding(this, $src);
24392     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24393     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24394     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24395                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24396   %}
24397   ins_pipe( pipe_slow );
24398 %}
24399 
24400 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24401   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24402             VM_Version::supports_avx512cd() &&
24403             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24404   match(Set dst (CountLeadingZerosV src));
24405   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24406   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24407   ins_encode %{
24408     int vlen_enc = vector_length_encoding(this, $src);
24409     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24410     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24411                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24412   %}
24413   ins_pipe( pipe_slow );
24414 %}
24415 
24416 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24417   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24418   match(Set dst (CountLeadingZerosV src));
24419   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24420   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24421   ins_encode %{
24422     int vlen_enc = vector_length_encoding(this, $src);
24423     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24424     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24425                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24426                                        $rtmp$$Register, true, vlen_enc);
24427   %}
24428   ins_pipe( pipe_slow );
24429 %}
24430 
24431 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24432   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24433             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24434   match(Set dst (CountLeadingZerosV src));
24435   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24436   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24437   ins_encode %{
24438     int vlen_enc = vector_length_encoding(this, $src);
24439     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24440     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24441                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24442   %}
24443   ins_pipe( pipe_slow );
24444 %}
24445 
24446 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24447   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24448             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24449   match(Set dst (CountLeadingZerosV src));
24450   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24451   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24452   ins_encode %{
24453     int vlen_enc = vector_length_encoding(this, $src);
24454     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24455     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24456                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24457   %}
24458   ins_pipe( pipe_slow );
24459 %}
24460 
24461 // ---------------------------------- Vector Masked Operations ------------------------------------
24462 
24463 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24464   match(Set dst (AddVB (Binary dst src2) mask));
24465   match(Set dst (AddVS (Binary dst src2) mask));
24466   match(Set dst (AddVI (Binary dst src2) mask));
24467   match(Set dst (AddVL (Binary dst src2) mask));
24468   match(Set dst (AddVF (Binary dst src2) mask));
24469   match(Set dst (AddVD (Binary dst src2) mask));
24470   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24471   ins_encode %{
24472     int vlen_enc = vector_length_encoding(this);
24473     BasicType bt = Matcher::vector_element_basic_type(this);
24474     int opc = this->ideal_Opcode();
24475     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24476                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24477   %}
24478   ins_pipe( pipe_slow );
24479 %}
24480 
24481 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24482   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24483   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24484   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24485   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24486   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24487   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24488   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24489   ins_encode %{
24490     int vlen_enc = vector_length_encoding(this);
24491     BasicType bt = Matcher::vector_element_basic_type(this);
24492     int opc = this->ideal_Opcode();
24493     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24494                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24495   %}
24496   ins_pipe( pipe_slow );
24497 %}
24498 
24499 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24500   match(Set dst (XorV (Binary dst src2) mask));
24501   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24502   ins_encode %{
24503     int vlen_enc = vector_length_encoding(this);
24504     BasicType bt = Matcher::vector_element_basic_type(this);
24505     int opc = this->ideal_Opcode();
24506     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24507                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24508   %}
24509   ins_pipe( pipe_slow );
24510 %}
24511 
24512 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24513   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24514   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24515   ins_encode %{
24516     int vlen_enc = vector_length_encoding(this);
24517     BasicType bt = Matcher::vector_element_basic_type(this);
24518     int opc = this->ideal_Opcode();
24519     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24520                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24521   %}
24522   ins_pipe( pipe_slow );
24523 %}
24524 
24525 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24526   match(Set dst (OrV (Binary dst src2) mask));
24527   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24528   ins_encode %{
24529     int vlen_enc = vector_length_encoding(this);
24530     BasicType bt = Matcher::vector_element_basic_type(this);
24531     int opc = this->ideal_Opcode();
24532     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24533                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24534   %}
24535   ins_pipe( pipe_slow );
24536 %}
24537 
24538 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24539   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24540   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24541   ins_encode %{
24542     int vlen_enc = vector_length_encoding(this);
24543     BasicType bt = Matcher::vector_element_basic_type(this);
24544     int opc = this->ideal_Opcode();
24545     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24546                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24547   %}
24548   ins_pipe( pipe_slow );
24549 %}
24550 
24551 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24552   match(Set dst (AndV (Binary dst src2) mask));
24553   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24554   ins_encode %{
24555     int vlen_enc = vector_length_encoding(this);
24556     BasicType bt = Matcher::vector_element_basic_type(this);
24557     int opc = this->ideal_Opcode();
24558     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24559                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24560   %}
24561   ins_pipe( pipe_slow );
24562 %}
24563 
24564 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24565   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24566   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24567   ins_encode %{
24568     int vlen_enc = vector_length_encoding(this);
24569     BasicType bt = Matcher::vector_element_basic_type(this);
24570     int opc = this->ideal_Opcode();
24571     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24572                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24573   %}
24574   ins_pipe( pipe_slow );
24575 %}
24576 
24577 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24578   match(Set dst (SubVB (Binary dst src2) mask));
24579   match(Set dst (SubVS (Binary dst src2) mask));
24580   match(Set dst (SubVI (Binary dst src2) mask));
24581   match(Set dst (SubVL (Binary dst src2) mask));
24582   match(Set dst (SubVF (Binary dst src2) mask));
24583   match(Set dst (SubVD (Binary dst src2) mask));
24584   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24585   ins_encode %{
24586     int vlen_enc = vector_length_encoding(this);
24587     BasicType bt = Matcher::vector_element_basic_type(this);
24588     int opc = this->ideal_Opcode();
24589     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24590                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24591   %}
24592   ins_pipe( pipe_slow );
24593 %}
24594 
24595 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24596   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24597   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24598   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24599   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24600   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24601   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24602   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24603   ins_encode %{
24604     int vlen_enc = vector_length_encoding(this);
24605     BasicType bt = Matcher::vector_element_basic_type(this);
24606     int opc = this->ideal_Opcode();
24607     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24608                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24609   %}
24610   ins_pipe( pipe_slow );
24611 %}
24612 
24613 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24614   match(Set dst (MulVS (Binary dst src2) mask));
24615   match(Set dst (MulVI (Binary dst src2) mask));
24616   match(Set dst (MulVL (Binary dst src2) mask));
24617   match(Set dst (MulVF (Binary dst src2) mask));
24618   match(Set dst (MulVD (Binary dst src2) mask));
24619   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24620   ins_encode %{
24621     int vlen_enc = vector_length_encoding(this);
24622     BasicType bt = Matcher::vector_element_basic_type(this);
24623     int opc = this->ideal_Opcode();
24624     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24625                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24626   %}
24627   ins_pipe( pipe_slow );
24628 %}
24629 
24630 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24631   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24632   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24633   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24634   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24635   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24636   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24637   ins_encode %{
24638     int vlen_enc = vector_length_encoding(this);
24639     BasicType bt = Matcher::vector_element_basic_type(this);
24640     int opc = this->ideal_Opcode();
24641     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24642                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24643   %}
24644   ins_pipe( pipe_slow );
24645 %}
24646 
24647 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24648   match(Set dst (SqrtVF dst mask));
24649   match(Set dst (SqrtVD dst mask));
24650   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24651   ins_encode %{
24652     int vlen_enc = vector_length_encoding(this);
24653     BasicType bt = Matcher::vector_element_basic_type(this);
24654     int opc = this->ideal_Opcode();
24655     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24656                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24657   %}
24658   ins_pipe( pipe_slow );
24659 %}
24660 
24661 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24662   match(Set dst (DivVF (Binary dst src2) mask));
24663   match(Set dst (DivVD (Binary dst src2) mask));
24664   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24665   ins_encode %{
24666     int vlen_enc = vector_length_encoding(this);
24667     BasicType bt = Matcher::vector_element_basic_type(this);
24668     int opc = this->ideal_Opcode();
24669     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24670                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24671   %}
24672   ins_pipe( pipe_slow );
24673 %}
24674 
24675 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24676   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24677   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24678   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24679   ins_encode %{
24680     int vlen_enc = vector_length_encoding(this);
24681     BasicType bt = Matcher::vector_element_basic_type(this);
24682     int opc = this->ideal_Opcode();
24683     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24684                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24685   %}
24686   ins_pipe( pipe_slow );
24687 %}
24688 
24689 
24690 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24691   match(Set dst (RotateLeftV (Binary dst shift) mask));
24692   match(Set dst (RotateRightV (Binary dst shift) mask));
24693   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24694   ins_encode %{
24695     int vlen_enc = vector_length_encoding(this);
24696     BasicType bt = Matcher::vector_element_basic_type(this);
24697     int opc = this->ideal_Opcode();
24698     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24699                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24700   %}
24701   ins_pipe( pipe_slow );
24702 %}
24703 
24704 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24705   match(Set dst (RotateLeftV (Binary dst src2) mask));
24706   match(Set dst (RotateRightV (Binary dst src2) mask));
24707   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24708   ins_encode %{
24709     int vlen_enc = vector_length_encoding(this);
24710     BasicType bt = Matcher::vector_element_basic_type(this);
24711     int opc = this->ideal_Opcode();
24712     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24713                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24714   %}
24715   ins_pipe( pipe_slow );
24716 %}
24717 
24718 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24719   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24720   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24721   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24722   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24723   ins_encode %{
24724     int vlen_enc = vector_length_encoding(this);
24725     BasicType bt = Matcher::vector_element_basic_type(this);
24726     int opc = this->ideal_Opcode();
24727     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24728                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24729   %}
24730   ins_pipe( pipe_slow );
24731 %}
24732 
24733 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24734   predicate(!n->as_ShiftV()->is_var_shift());
24735   match(Set dst (LShiftVS (Binary dst src2) mask));
24736   match(Set dst (LShiftVI (Binary dst src2) mask));
24737   match(Set dst (LShiftVL (Binary dst src2) mask));
24738   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24739   ins_encode %{
24740     int vlen_enc = vector_length_encoding(this);
24741     BasicType bt = Matcher::vector_element_basic_type(this);
24742     int opc = this->ideal_Opcode();
24743     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24744                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24745   %}
24746   ins_pipe( pipe_slow );
24747 %}
24748 
24749 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24750   predicate(n->as_ShiftV()->is_var_shift());
24751   match(Set dst (LShiftVS (Binary dst src2) mask));
24752   match(Set dst (LShiftVI (Binary dst src2) mask));
24753   match(Set dst (LShiftVL (Binary dst src2) mask));
24754   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24755   ins_encode %{
24756     int vlen_enc = vector_length_encoding(this);
24757     BasicType bt = Matcher::vector_element_basic_type(this);
24758     int opc = this->ideal_Opcode();
24759     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24760                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24761   %}
24762   ins_pipe( pipe_slow );
24763 %}
24764 
24765 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24766   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24767   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24768   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24769   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24770   ins_encode %{
24771     int vlen_enc = vector_length_encoding(this);
24772     BasicType bt = Matcher::vector_element_basic_type(this);
24773     int opc = this->ideal_Opcode();
24774     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24775                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24776   %}
24777   ins_pipe( pipe_slow );
24778 %}
24779 
24780 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24781   predicate(!n->as_ShiftV()->is_var_shift());
24782   match(Set dst (RShiftVS (Binary dst src2) mask));
24783   match(Set dst (RShiftVI (Binary dst src2) mask));
24784   match(Set dst (RShiftVL (Binary dst src2) mask));
24785   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24786   ins_encode %{
24787     int vlen_enc = vector_length_encoding(this);
24788     BasicType bt = Matcher::vector_element_basic_type(this);
24789     int opc = this->ideal_Opcode();
24790     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24791                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24792   %}
24793   ins_pipe( pipe_slow );
24794 %}
24795 
24796 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24797   predicate(n->as_ShiftV()->is_var_shift());
24798   match(Set dst (RShiftVS (Binary dst src2) mask));
24799   match(Set dst (RShiftVI (Binary dst src2) mask));
24800   match(Set dst (RShiftVL (Binary dst src2) mask));
24801   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24802   ins_encode %{
24803     int vlen_enc = vector_length_encoding(this);
24804     BasicType bt = Matcher::vector_element_basic_type(this);
24805     int opc = this->ideal_Opcode();
24806     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24807                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24808   %}
24809   ins_pipe( pipe_slow );
24810 %}
24811 
24812 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24813   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24814   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24815   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24816   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24817   ins_encode %{
24818     int vlen_enc = vector_length_encoding(this);
24819     BasicType bt = Matcher::vector_element_basic_type(this);
24820     int opc = this->ideal_Opcode();
24821     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24822                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24823   %}
24824   ins_pipe( pipe_slow );
24825 %}
24826 
24827 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24828   predicate(!n->as_ShiftV()->is_var_shift());
24829   match(Set dst (URShiftVS (Binary dst src2) mask));
24830   match(Set dst (URShiftVI (Binary dst src2) mask));
24831   match(Set dst (URShiftVL (Binary dst src2) mask));
24832   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24833   ins_encode %{
24834     int vlen_enc = vector_length_encoding(this);
24835     BasicType bt = Matcher::vector_element_basic_type(this);
24836     int opc = this->ideal_Opcode();
24837     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24838                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24839   %}
24840   ins_pipe( pipe_slow );
24841 %}
24842 
24843 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24844   predicate(n->as_ShiftV()->is_var_shift());
24845   match(Set dst (URShiftVS (Binary dst src2) mask));
24846   match(Set dst (URShiftVI (Binary dst src2) mask));
24847   match(Set dst (URShiftVL (Binary dst src2) mask));
24848   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24849   ins_encode %{
24850     int vlen_enc = vector_length_encoding(this);
24851     BasicType bt = Matcher::vector_element_basic_type(this);
24852     int opc = this->ideal_Opcode();
24853     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24854                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24855   %}
24856   ins_pipe( pipe_slow );
24857 %}
24858 
24859 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24860   match(Set dst (MaxV (Binary dst src2) mask));
24861   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24862   ins_encode %{
24863     int vlen_enc = vector_length_encoding(this);
24864     BasicType bt = Matcher::vector_element_basic_type(this);
24865     int opc = this->ideal_Opcode();
24866     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24867                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24868   %}
24869   ins_pipe( pipe_slow );
24870 %}
24871 
24872 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24873   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24874   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24875   ins_encode %{
24876     int vlen_enc = vector_length_encoding(this);
24877     BasicType bt = Matcher::vector_element_basic_type(this);
24878     int opc = this->ideal_Opcode();
24879     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24880                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24881   %}
24882   ins_pipe( pipe_slow );
24883 %}
24884 
24885 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24886   match(Set dst (MinV (Binary dst src2) mask));
24887   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24888   ins_encode %{
24889     int vlen_enc = vector_length_encoding(this);
24890     BasicType bt = Matcher::vector_element_basic_type(this);
24891     int opc = this->ideal_Opcode();
24892     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24893                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24894   %}
24895   ins_pipe( pipe_slow );
24896 %}
24897 
24898 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24899   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24900   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24901   ins_encode %{
24902     int vlen_enc = vector_length_encoding(this);
24903     BasicType bt = Matcher::vector_element_basic_type(this);
24904     int opc = this->ideal_Opcode();
24905     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24906                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24907   %}
24908   ins_pipe( pipe_slow );
24909 %}
24910 
24911 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24912   match(Set dst (VectorRearrange (Binary dst src2) mask));
24913   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24914   ins_encode %{
24915     int vlen_enc = vector_length_encoding(this);
24916     BasicType bt = Matcher::vector_element_basic_type(this);
24917     int opc = this->ideal_Opcode();
24918     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24919                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24920   %}
24921   ins_pipe( pipe_slow );
24922 %}
24923 
24924 instruct vabs_masked(vec dst, kReg mask) %{
24925   match(Set dst (AbsVB dst mask));
24926   match(Set dst (AbsVS dst mask));
24927   match(Set dst (AbsVI dst mask));
24928   match(Set dst (AbsVL dst mask));
24929   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24930   ins_encode %{
24931     int vlen_enc = vector_length_encoding(this);
24932     BasicType bt = Matcher::vector_element_basic_type(this);
24933     int opc = this->ideal_Opcode();
24934     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24935                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24936   %}
24937   ins_pipe( pipe_slow );
24938 %}
24939 
24940 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24941   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24942   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24943   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24944   ins_encode %{
24945     assert(UseFMA, "Needs FMA instructions support.");
24946     int vlen_enc = vector_length_encoding(this);
24947     BasicType bt = Matcher::vector_element_basic_type(this);
24948     int opc = this->ideal_Opcode();
24949     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24950                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24951   %}
24952   ins_pipe( pipe_slow );
24953 %}
24954 
24955 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24956   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24957   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24958   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24959   ins_encode %{
24960     assert(UseFMA, "Needs FMA instructions support.");
24961     int vlen_enc = vector_length_encoding(this);
24962     BasicType bt = Matcher::vector_element_basic_type(this);
24963     int opc = this->ideal_Opcode();
24964     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24965                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24966   %}
24967   ins_pipe( pipe_slow );
24968 %}
24969 
24970 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24971   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24972   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24973   ins_encode %{
24974     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24975     int vlen_enc = vector_length_encoding(this, $src1);
24976     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24977 
24978     // Comparison i
24979     switch (src1_elem_bt) {
24980       case T_BYTE: {
24981         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24982         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24983         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24984         break;
24985       }
24986       case T_SHORT: {
24987         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24988         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24989         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24990         break;
24991       }
24992       case T_INT: {
24993         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24994         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24995         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24996         break;
24997       }
24998       case T_LONG: {
24999         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25000         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25001         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25002         break;
25003       }
25004       case T_FLOAT: {
25005         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25006         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25007         break;
25008       }
25009       case T_DOUBLE: {
25010         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25011         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25012         break;
25013       }
25014       default: assert(false, "%s", type2name(src1_elem_bt)); break;
25015     }
25016   %}
25017   ins_pipe( pipe_slow );
25018 %}
25019 
25020 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
25021   predicate(Matcher::vector_length(n) <= 32);
25022   match(Set dst (MaskAll src));
25023   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
25024   ins_encode %{
25025     int mask_len = Matcher::vector_length(this);
25026     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
25027   %}
25028   ins_pipe( pipe_slow );
25029 %}
25030 
25031 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
25032   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
25033   match(Set dst (XorVMask src (MaskAll cnt)));
25034   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
25035   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
25036   ins_encode %{
25037     uint masklen = Matcher::vector_length(this);
25038     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
25039   %}
25040   ins_pipe( pipe_slow );
25041 %}
25042 
25043 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
25044   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
25045             (Matcher::vector_length(n) == 16) ||
25046             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
25047   match(Set dst (XorVMask src (MaskAll cnt)));
25048   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
25049   ins_encode %{
25050     uint masklen = Matcher::vector_length(this);
25051     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
25052   %}
25053   ins_pipe( pipe_slow );
25054 %}
25055 
25056 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
25057   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
25058   match(Set dst (VectorLongToMask src));
25059   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
25060   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
25061   ins_encode %{
25062     int mask_len = Matcher::vector_length(this);
25063     int vec_enc  = vector_length_encoding(mask_len);
25064     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25065                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
25066   %}
25067   ins_pipe( pipe_slow );
25068 %}
25069 
25070 
25071 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
25072   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
25073   match(Set dst (VectorLongToMask src));
25074   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
25075   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
25076   ins_encode %{
25077     int mask_len = Matcher::vector_length(this);
25078     assert(mask_len <= 32, "invalid mask length");
25079     int vec_enc  = vector_length_encoding(mask_len);
25080     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25081                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
25082   %}
25083   ins_pipe( pipe_slow );
25084 %}
25085 
25086 instruct long_to_mask_evex(kReg dst, rRegL src) %{
25087   predicate(n->bottom_type()->isa_vectmask());
25088   match(Set dst (VectorLongToMask src));
25089   format %{ "long_to_mask_evex $dst, $src\t!" %}
25090   ins_encode %{
25091     __ kmov($dst$$KRegister, $src$$Register);
25092   %}
25093   ins_pipe( pipe_slow );
25094 %}
25095 
25096 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
25097   match(Set dst (AndVMask src1 src2));
25098   match(Set dst (OrVMask src1 src2));
25099   match(Set dst (XorVMask src1 src2));
25100   effect(TEMP kscratch);
25101   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
25102   ins_encode %{
25103     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
25104     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
25105     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
25106     uint masklen = Matcher::vector_length(this);
25107     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
25108     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
25109   %}
25110   ins_pipe( pipe_slow );
25111 %}
25112 
25113 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
25114   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25115   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25116   ins_encode %{
25117     int vlen_enc = vector_length_encoding(this);
25118     BasicType bt = Matcher::vector_element_basic_type(this);
25119     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25120                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
25121   %}
25122   ins_pipe( pipe_slow );
25123 %}
25124 
25125 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
25126   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25127   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25128   ins_encode %{
25129     int vlen_enc = vector_length_encoding(this);
25130     BasicType bt = Matcher::vector_element_basic_type(this);
25131     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25132                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25133   %}
25134   ins_pipe( pipe_slow );
25135 %}
25136 
25137 instruct castMM(kReg dst)
25138 %{
25139   match(Set dst (CastVV dst));
25140 
25141   size(0);
25142   format %{ "# castVV of $dst" %}
25143   ins_encode(/* empty encoding */);
25144   ins_cost(0);
25145   ins_pipe(empty);
25146 %}
25147 
25148 instruct castVV(vec dst)
25149 %{
25150   match(Set dst (CastVV dst));
25151 
25152   size(0);
25153   format %{ "# castVV of $dst" %}
25154   ins_encode(/* empty encoding */);
25155   ins_cost(0);
25156   ins_pipe(empty);
25157 %}
25158 
25159 instruct castVVLeg(legVec dst)
25160 %{
25161   match(Set dst (CastVV dst));
25162 
25163   size(0);
25164   format %{ "# castVV of $dst" %}
25165   ins_encode(/* empty encoding */);
25166   ins_cost(0);
25167   ins_pipe(empty);
25168 %}
25169 
25170 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25171 %{
25172   match(Set dst (IsInfiniteF src));
25173   effect(TEMP ktmp, KILL cr);
25174   format %{ "float_class_check $dst, $src" %}
25175   ins_encode %{
25176     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25177     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25178   %}
25179   ins_pipe(pipe_slow);
25180 %}
25181 
25182 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25183 %{
25184   match(Set dst (IsInfiniteD src));
25185   effect(TEMP ktmp, KILL cr);
25186   format %{ "double_class_check $dst, $src" %}
25187   ins_encode %{
25188     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25189     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25190   %}
25191   ins_pipe(pipe_slow);
25192 %}
25193 
25194 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25195 %{
25196   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25197             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25198   match(Set dst (SaturatingAddV src1 src2));
25199   match(Set dst (SaturatingSubV src1 src2));
25200   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25201   ins_encode %{
25202     int vlen_enc = vector_length_encoding(this);
25203     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25204     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25205                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25206   %}
25207   ins_pipe(pipe_slow);
25208 %}
25209 
25210 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25211 %{
25212   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25213             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25214   match(Set dst (SaturatingAddV src1 src2));
25215   match(Set dst (SaturatingSubV src1 src2));
25216   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25217   ins_encode %{
25218     int vlen_enc = vector_length_encoding(this);
25219     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25220     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25221                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25222   %}
25223   ins_pipe(pipe_slow);
25224 %}
25225 
25226 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25227 %{
25228   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25229             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25230             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25231   match(Set dst (SaturatingAddV src1 src2));
25232   match(Set dst (SaturatingSubV src1 src2));
25233   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25234   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25235   ins_encode %{
25236     int vlen_enc = vector_length_encoding(this);
25237     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25238     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25239                                         $src1$$XMMRegister, $src2$$XMMRegister,
25240                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25241                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25242   %}
25243   ins_pipe(pipe_slow);
25244 %}
25245 
25246 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25247 %{
25248   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25249             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25250             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25251   match(Set dst (SaturatingAddV src1 src2));
25252   match(Set dst (SaturatingSubV src1 src2));
25253   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25254   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25255   ins_encode %{
25256     int vlen_enc = vector_length_encoding(this);
25257     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25258     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25259                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25260                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25261   %}
25262   ins_pipe(pipe_slow);
25263 %}
25264 
25265 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25266 %{
25267   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25268             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25269             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25270   match(Set dst (SaturatingAddV src1 src2));
25271   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25272   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25273   ins_encode %{
25274     int vlen_enc = vector_length_encoding(this);
25275     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25276     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25277                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25278   %}
25279   ins_pipe(pipe_slow);
25280 %}
25281 
25282 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25283 %{
25284   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25285             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25286             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25287   match(Set dst (SaturatingAddV src1 src2));
25288   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25289   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25290   ins_encode %{
25291     int vlen_enc = vector_length_encoding(this);
25292     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25293     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25294                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25295   %}
25296   ins_pipe(pipe_slow);
25297 %}
25298 
25299 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25300 %{
25301   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25302             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25303             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25304   match(Set dst (SaturatingSubV src1 src2));
25305   effect(TEMP ktmp);
25306   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25307   ins_encode %{
25308     int vlen_enc = vector_length_encoding(this);
25309     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25310     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25311                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25312   %}
25313   ins_pipe(pipe_slow);
25314 %}
25315 
25316 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25317 %{
25318   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25319             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25320             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25321   match(Set dst (SaturatingSubV src1 src2));
25322   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25323   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25324   ins_encode %{
25325     int vlen_enc = vector_length_encoding(this);
25326     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25327     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25328                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25329   %}
25330   ins_pipe(pipe_slow);
25331 %}
25332 
25333 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25334 %{
25335   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25336             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25337   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25338   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25339   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25340   ins_encode %{
25341     int vlen_enc = vector_length_encoding(this);
25342     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25343     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25344                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25345   %}
25346   ins_pipe(pipe_slow);
25347 %}
25348 
25349 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25350 %{
25351   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25352             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25353   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25354   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25355   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25356   ins_encode %{
25357     int vlen_enc = vector_length_encoding(this);
25358     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25359     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25360                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25361   %}
25362   ins_pipe(pipe_slow);
25363 %}
25364 
25365 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25366   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25367             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25368   match(Set dst (SaturatingAddV (Binary dst src) mask));
25369   match(Set dst (SaturatingSubV (Binary dst src) mask));
25370   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25371   ins_encode %{
25372     int vlen_enc = vector_length_encoding(this);
25373     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25374     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25375                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25376   %}
25377   ins_pipe( pipe_slow );
25378 %}
25379 
25380 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25381   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25382             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25383   match(Set dst (SaturatingAddV (Binary dst src) mask));
25384   match(Set dst (SaturatingSubV (Binary dst src) mask));
25385   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25386   ins_encode %{
25387     int vlen_enc = vector_length_encoding(this);
25388     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25389     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25390                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25391   %}
25392   ins_pipe( pipe_slow );
25393 %}
25394 
25395 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25396   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25397             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25398   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25399   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25400   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25401   ins_encode %{
25402     int vlen_enc = vector_length_encoding(this);
25403     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25404     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25405                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25406   %}
25407   ins_pipe( pipe_slow );
25408 %}
25409 
25410 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25411   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25412             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25413   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25414   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25415   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25416   ins_encode %{
25417     int vlen_enc = vector_length_encoding(this);
25418     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25419     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25420                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25421   %}
25422   ins_pipe( pipe_slow );
25423 %}
25424 
25425 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25426 %{
25427   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25428   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25429   ins_encode %{
25430     int vlen_enc = vector_length_encoding(this);
25431     BasicType bt = Matcher::vector_element_basic_type(this);
25432     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25433   %}
25434   ins_pipe(pipe_slow);
25435 %}
25436 
25437 instruct reinterpretS2HF(regF dst, rRegI src)
25438 %{
25439   match(Set dst (ReinterpretS2HF src));
25440   format %{ "vmovw $dst, $src" %}
25441   ins_encode %{
25442     __ vmovw($dst$$XMMRegister, $src$$Register);
25443   %}
25444   ins_pipe(pipe_slow);
25445 %}
25446 
25447 instruct reinterpretHF2S(rRegI dst, regF src)
25448 %{
25449   match(Set dst (ReinterpretHF2S src));
25450   format %{ "vmovw $dst, $src" %}
25451   ins_encode %{
25452     __ vmovw($dst$$Register, $src$$XMMRegister);
25453   %}
25454   ins_pipe(pipe_slow);
25455 %}
25456 
25457 instruct convF2HFAndS2HF(regF dst, regF src)
25458 %{
25459   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25460   format %{ "convF2HFAndS2HF $dst, $src" %}
25461   ins_encode %{
25462     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25463   %}
25464   ins_pipe(pipe_slow);
25465 %}
25466 
25467 instruct convHF2SAndHF2F(regF dst, regF src)
25468 %{
25469   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25470   format %{ "convHF2SAndHF2F $dst, $src" %}
25471   ins_encode %{
25472     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25473   %}
25474   ins_pipe(pipe_slow);
25475 %}
25476 
25477 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25478 %{
25479   match(Set dst (SqrtHF src));
25480   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25481   ins_encode %{
25482     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25483   %}
25484   ins_pipe(pipe_slow);
25485 %}
25486 
25487 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25488 %{
25489   match(Set dst (AddHF src1 src2));
25490   match(Set dst (DivHF src1 src2));
25491   match(Set dst (MulHF src1 src2));
25492   match(Set dst (SubHF src1 src2));
25493   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25494   ins_encode %{
25495     int opcode = this->ideal_Opcode();
25496     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25497   %}
25498   ins_pipe(pipe_slow);
25499 %}
25500 
25501 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25502 %{
25503   predicate(VM_Version::supports_avx10_2());
25504   match(Set dst (MaxHF src1 src2));
25505   match(Set dst (MinHF src1 src2));
25506   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25507   ins_encode %{
25508     int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25509     __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25510   %}
25511   ins_pipe( pipe_slow );
25512 %}
25513 
25514 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25515 %{
25516   predicate(!VM_Version::supports_avx10_2());
25517   match(Set dst (MaxHF src1 src2));
25518   match(Set dst (MinHF src1 src2));
25519   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25520   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25521   ins_encode %{
25522     int opcode = this->ideal_Opcode();
25523     __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25524                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25525   %}
25526   ins_pipe( pipe_slow );
25527 %}
25528 
25529 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25530 %{
25531   match(Set dst (FmaHF  src2 (Binary dst src1)));
25532   effect(DEF dst);
25533   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25534   ins_encode %{
25535     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25536   %}
25537   ins_pipe( pipe_slow );
25538 %}
25539 
25540 
25541 instruct vector_sqrt_HF_reg(vec dst, vec src)
25542 %{
25543   match(Set dst (SqrtVHF src));
25544   format %{ "vector_sqrt_fp16 $dst, $src" %}
25545   ins_encode %{
25546     int vlen_enc = vector_length_encoding(this);
25547     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25548   %}
25549   ins_pipe(pipe_slow);
25550 %}
25551 
25552 instruct vector_sqrt_HF_mem(vec dst, memory src)
25553 %{
25554   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25555   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25556   ins_encode %{
25557     int vlen_enc = vector_length_encoding(this);
25558     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25559   %}
25560   ins_pipe(pipe_slow);
25561 %}
25562 
25563 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25564 %{
25565   match(Set dst (AddVHF src1 src2));
25566   match(Set dst (DivVHF src1 src2));
25567   match(Set dst (MulVHF src1 src2));
25568   match(Set dst (SubVHF src1 src2));
25569   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25570   ins_encode %{
25571     int vlen_enc = vector_length_encoding(this);
25572     int opcode = this->ideal_Opcode();
25573     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25574   %}
25575   ins_pipe(pipe_slow);
25576 %}
25577 
25578 
25579 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25580 %{
25581   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25582   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25583   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25584   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25585   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25586   ins_encode %{
25587     int vlen_enc = vector_length_encoding(this);
25588     int opcode = this->ideal_Opcode();
25589     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25590   %}
25591   ins_pipe(pipe_slow);
25592 %}
25593 
25594 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25595 %{
25596   match(Set dst (FmaVHF src2 (Binary dst src1)));
25597   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25598   ins_encode %{
25599     int vlen_enc = vector_length_encoding(this);
25600     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25601   %}
25602   ins_pipe( pipe_slow );
25603 %}
25604 
25605 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25606 %{
25607   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25608   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25609   ins_encode %{
25610     int vlen_enc = vector_length_encoding(this);
25611     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25612   %}
25613   ins_pipe( pipe_slow );
25614 %}
25615 
25616 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25617 %{
25618   predicate(VM_Version::supports_avx10_2());
25619   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25620   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25621   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25622   ins_encode %{
25623     int vlen_enc = vector_length_encoding(this);
25624     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25625     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25626   %}
25627   ins_pipe( pipe_slow );
25628 %}
25629 
25630 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25631 %{
25632   predicate(VM_Version::supports_avx10_2());
25633   match(Set dst (MinVHF src1 src2));
25634   match(Set dst (MaxVHF src1 src2));
25635   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25636   ins_encode %{
25637     int vlen_enc = vector_length_encoding(this);
25638     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25639     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25640   %}
25641   ins_pipe( pipe_slow );
25642 %}
25643 
25644 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25645 %{
25646   predicate(!VM_Version::supports_avx10_2());
25647   match(Set dst (MinVHF src1 src2));
25648   match(Set dst (MaxVHF src1 src2));
25649   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25650   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25651   ins_encode %{
25652     int vlen_enc = vector_length_encoding(this);
25653     int opcode = this->ideal_Opcode();
25654     __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25655                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25656   %}
25657   ins_pipe( pipe_slow );
25658 %}
25659 
25660 //----------PEEPHOLE RULES-----------------------------------------------------
25661 // These must follow all instruction definitions as they use the names
25662 // defined in the instructions definitions.
25663 //
25664 // peeppredicate ( rule_predicate );
25665 // // the predicate unless which the peephole rule will be ignored
25666 //
25667 // peepmatch ( root_instr_name [preceding_instruction]* );
25668 //
25669 // peepprocedure ( procedure_name );
25670 // // provide a procedure name to perform the optimization, the procedure should
25671 // // reside in the architecture dependent peephole file, the method has the
25672 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25673 // // with the arguments being the basic block, the current node index inside the
25674 // // block, the register allocator, the functions upon invoked return a new node
25675 // // defined in peepreplace, and the rules of the nodes appearing in the
25676 // // corresponding peepmatch, the function return true if successful, else
25677 // // return false
25678 //
25679 // peepconstraint %{
25680 // (instruction_number.operand_name relational_op instruction_number.operand_name
25681 //  [, ...] );
25682 // // instruction numbers are zero-based using left to right order in peepmatch
25683 //
25684 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25685 // // provide an instruction_number.operand_name for each operand that appears
25686 // // in the replacement instruction's match rule
25687 //
25688 // ---------VM FLAGS---------------------------------------------------------
25689 //
25690 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25691 //
25692 // Each peephole rule is given an identifying number starting with zero and
25693 // increasing by one in the order seen by the parser.  An individual peephole
25694 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25695 // on the command-line.
25696 //
25697 // ---------CURRENT LIMITATIONS----------------------------------------------
25698 //
25699 // Only transformations inside a basic block (do we need more for peephole)
25700 //
25701 // ---------EXAMPLE----------------------------------------------------------
25702 //
25703 // // pertinent parts of existing instructions in architecture description
25704 // instruct movI(rRegI dst, rRegI src)
25705 // %{
25706 //   match(Set dst (CopyI src));
25707 // %}
25708 //
25709 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25710 // %{
25711 //   match(Set dst (AddI dst src));
25712 //   effect(KILL cr);
25713 // %}
25714 //
25715 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25716 // %{
25717 //   match(Set dst (AddI dst src));
25718 // %}
25719 //
25720 // 1. Simple replacement
25721 // - Only match adjacent instructions in same basic block
25722 // - Only equality constraints
25723 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25724 // - Only one replacement instruction
25725 //
25726 // // Change (inc mov) to lea
25727 // peephole %{
25728 //   // lea should only be emitted when beneficial
25729 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25730 //   // increment preceded by register-register move
25731 //   peepmatch ( incI_rReg movI );
25732 //   // require that the destination register of the increment
25733 //   // match the destination register of the move
25734 //   peepconstraint ( 0.dst == 1.dst );
25735 //   // construct a replacement instruction that sets
25736 //   // the destination to ( move's source register + one )
25737 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25738 // %}
25739 //
25740 // 2. Procedural replacement
25741 // - More flexible finding relevent nodes
25742 // - More flexible constraints
25743 // - More flexible transformations
25744 // - May utilise architecture-dependent API more effectively
25745 // - Currently only one replacement instruction due to adlc parsing capabilities
25746 //
25747 // // Change (inc mov) to lea
25748 // peephole %{
25749 //   // lea should only be emitted when beneficial
25750 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25751 //   // the rule numbers of these nodes inside are passed into the function below
25752 //   peepmatch ( incI_rReg movI );
25753 //   // the method that takes the responsibility of transformation
25754 //   peepprocedure ( inc_mov_to_lea );
25755 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25756 //   // node is passed into the function above
25757 //   peepreplace ( leaI_rReg_immI() );
25758 // %}
25759 
25760 // These instructions is not matched by the matcher but used by the peephole
25761 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25762 %{
25763   predicate(false);
25764   match(Set dst (AddI src1 src2));
25765   format %{ "leal    $dst, [$src1 + $src2]" %}
25766   ins_encode %{
25767     Register dst = $dst$$Register;
25768     Register src1 = $src1$$Register;
25769     Register src2 = $src2$$Register;
25770     if (src1 != rbp && src1 != r13) {
25771       __ leal(dst, Address(src1, src2, Address::times_1));
25772     } else {
25773       assert(src2 != rbp && src2 != r13, "");
25774       __ leal(dst, Address(src2, src1, Address::times_1));
25775     }
25776   %}
25777   ins_pipe(ialu_reg_reg);
25778 %}
25779 
25780 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25781 %{
25782   predicate(false);
25783   match(Set dst (AddI src1 src2));
25784   format %{ "leal    $dst, [$src1 + $src2]" %}
25785   ins_encode %{
25786     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25787   %}
25788   ins_pipe(ialu_reg_reg);
25789 %}
25790 
25791 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25792 %{
25793   predicate(false);
25794   match(Set dst (LShiftI src shift));
25795   format %{ "leal    $dst, [$src << $shift]" %}
25796   ins_encode %{
25797     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25798     Register src = $src$$Register;
25799     if (scale == Address::times_2 && src != rbp && src != r13) {
25800       __ leal($dst$$Register, Address(src, src, Address::times_1));
25801     } else {
25802       __ leal($dst$$Register, Address(noreg, src, scale));
25803     }
25804   %}
25805   ins_pipe(ialu_reg_reg);
25806 %}
25807 
25808 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25809 %{
25810   predicate(false);
25811   match(Set dst (AddL src1 src2));
25812   format %{ "leaq    $dst, [$src1 + $src2]" %}
25813   ins_encode %{
25814     Register dst = $dst$$Register;
25815     Register src1 = $src1$$Register;
25816     Register src2 = $src2$$Register;
25817     if (src1 != rbp && src1 != r13) {
25818       __ leaq(dst, Address(src1, src2, Address::times_1));
25819     } else {
25820       assert(src2 != rbp && src2 != r13, "");
25821       __ leaq(dst, Address(src2, src1, Address::times_1));
25822     }
25823   %}
25824   ins_pipe(ialu_reg_reg);
25825 %}
25826 
25827 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25828 %{
25829   predicate(false);
25830   match(Set dst (AddL src1 src2));
25831   format %{ "leaq    $dst, [$src1 + $src2]" %}
25832   ins_encode %{
25833     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25834   %}
25835   ins_pipe(ialu_reg_reg);
25836 %}
25837 
25838 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25839 %{
25840   predicate(false);
25841   match(Set dst (LShiftL src shift));
25842   format %{ "leaq    $dst, [$src << $shift]" %}
25843   ins_encode %{
25844     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25845     Register src = $src$$Register;
25846     if (scale == Address::times_2 && src != rbp && src != r13) {
25847       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25848     } else {
25849       __ leaq($dst$$Register, Address(noreg, src, scale));
25850     }
25851   %}
25852   ins_pipe(ialu_reg_reg);
25853 %}
25854 
25855 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25856 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25857 // processors with at least partial ALU support for lea
25858 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25859 // beneficial for processors with full ALU support
25860 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25861 
25862 peephole
25863 %{
25864   peeppredicate(VM_Version::supports_fast_2op_lea());
25865   peepmatch (addI_rReg);
25866   peepprocedure (lea_coalesce_reg);
25867   peepreplace (leaI_rReg_rReg_peep());
25868 %}
25869 
25870 peephole
25871 %{
25872   peeppredicate(VM_Version::supports_fast_2op_lea());
25873   peepmatch (addI_rReg_imm);
25874   peepprocedure (lea_coalesce_imm);
25875   peepreplace (leaI_rReg_immI_peep());
25876 %}
25877 
25878 peephole
25879 %{
25880   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25881                 VM_Version::is_intel_cascade_lake());
25882   peepmatch (incI_rReg);
25883   peepprocedure (lea_coalesce_imm);
25884   peepreplace (leaI_rReg_immI_peep());
25885 %}
25886 
25887 peephole
25888 %{
25889   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25890                 VM_Version::is_intel_cascade_lake());
25891   peepmatch (decI_rReg);
25892   peepprocedure (lea_coalesce_imm);
25893   peepreplace (leaI_rReg_immI_peep());
25894 %}
25895 
25896 peephole
25897 %{
25898   peeppredicate(VM_Version::supports_fast_2op_lea());
25899   peepmatch (salI_rReg_immI2);
25900   peepprocedure (lea_coalesce_imm);
25901   peepreplace (leaI_rReg_immI2_peep());
25902 %}
25903 
25904 peephole
25905 %{
25906   peeppredicate(VM_Version::supports_fast_2op_lea());
25907   peepmatch (addL_rReg);
25908   peepprocedure (lea_coalesce_reg);
25909   peepreplace (leaL_rReg_rReg_peep());
25910 %}
25911 
25912 peephole
25913 %{
25914   peeppredicate(VM_Version::supports_fast_2op_lea());
25915   peepmatch (addL_rReg_imm);
25916   peepprocedure (lea_coalesce_imm);
25917   peepreplace (leaL_rReg_immL32_peep());
25918 %}
25919 
25920 peephole
25921 %{
25922   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25923                 VM_Version::is_intel_cascade_lake());
25924   peepmatch (incL_rReg);
25925   peepprocedure (lea_coalesce_imm);
25926   peepreplace (leaL_rReg_immL32_peep());
25927 %}
25928 
25929 peephole
25930 %{
25931   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25932                 VM_Version::is_intel_cascade_lake());
25933   peepmatch (decL_rReg);
25934   peepprocedure (lea_coalesce_imm);
25935   peepreplace (leaL_rReg_immL32_peep());
25936 %}
25937 
25938 peephole
25939 %{
25940   peeppredicate(VM_Version::supports_fast_2op_lea());
25941   peepmatch (salL_rReg_immI2);
25942   peepprocedure (lea_coalesce_imm);
25943   peepreplace (leaL_rReg_immI2_peep());
25944 %}
25945 
25946 peephole
25947 %{
25948   peepmatch (leaPCompressedOopOffset);
25949   peepprocedure (lea_remove_redundant);
25950 %}
25951 
25952 peephole
25953 %{
25954   peepmatch (leaP8Narrow);
25955   peepprocedure (lea_remove_redundant);
25956 %}
25957 
25958 peephole
25959 %{
25960   peepmatch (leaP32Narrow);
25961   peepprocedure (lea_remove_redundant);
25962 %}
25963 
25964 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25965 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25966 
25967 //int variant
25968 peephole
25969 %{
25970   peepmatch (testI_reg);
25971   peepprocedure (test_may_remove);
25972 %}
25973 
25974 //long variant
25975 peephole
25976 %{
25977   peepmatch (testL_reg);
25978   peepprocedure (test_may_remove);
25979 %}
25980 
25981 
25982 //----------SMARTSPILL RULES---------------------------------------------------
25983 // These must follow all instruction definitions as they use the names
25984 // defined in the instructions definitions.