1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 //
 1663 // Compute padding required for nodes which need alignment
 1664 //
 1665 
 1666 // The address of the call instruction needs to be 4-byte aligned to
 1667 // ensure that it does not span a cache line so that it can be patched.
 1668 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1669 {
 1670   current_offset += clear_avx_size(); // skip vzeroupper
 1671   current_offset += 1; // skip call opcode byte
 1672   return align_up(current_offset, alignment_required()) - current_offset;
 1673 }
 1674 
 1675 // The address of the call instruction needs to be 4-byte aligned to
 1676 // ensure that it does not span a cache line so that it can be patched.
 1677 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1678 {
 1679   current_offset += clear_avx_size(); // skip vzeroupper
 1680   current_offset += 11; // skip movq instruction + call opcode byte
 1681   return align_up(current_offset, alignment_required()) - current_offset;
 1682 }
 1683 
 1684 // This could be in MacroAssembler but it's fairly C2 specific
 1685 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1686   Label exit;
 1687   __ jccb(Assembler::noParity, exit);
 1688   __ pushf();
 1689   //
 1690   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1691   // zero OF,AF,SF for NaN values.
 1692   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1693   // values returns 'less than' result (CF is set).
 1694   // Leave the rest of flags unchanged.
 1695   //
 1696   //    7 6 5 4 3 2 1 0
 1697   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1698   //    0 0 1 0 1 0 1 1   (0x2B)
 1699   //
 1700   __ andq(Address(rsp, 0), 0xffffff2b);
 1701   __ popf();
 1702   __ bind(exit);
 1703 }
 1704 
 1705 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1706   // If any floating point comparison instruction is used, unordered case always triggers jump
 1707   // for below condition, CF=1 is true when at least one input is NaN
 1708   Label done;
 1709   __ movl(dst, -1);
 1710   __ jcc(Assembler::below, done);
 1711   __ setcc(Assembler::notEqual, dst);
 1712   __ bind(done);
 1713 }
 1714 
 1715 enum FP_PREC {
 1716   fp_prec_hlf,
 1717   fp_prec_flt,
 1718   fp_prec_dbl
 1719 };
 1720 
 1721 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
 1722                                 XMMRegister p, XMMRegister q) {
 1723   if (pt == fp_prec_hlf) {
 1724     __ evucomish(p, q);
 1725   } else if (pt == fp_prec_flt) {
 1726     __ ucomiss(p, q);
 1727   } else {
 1728     __ ucomisd(p, q);
 1729   }
 1730 }
 1731 
 1732 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
 1733                          XMMRegister dst, XMMRegister src, Register scratch) {
 1734   if (pt == fp_prec_hlf) {
 1735     __ movhlf(dst, src, scratch);
 1736   } else if (pt == fp_prec_flt) {
 1737     __ movflt(dst, src);
 1738   } else {
 1739     __ movdbl(dst, src);
 1740   }
 1741 }
 1742 
 1743 // Math.min()          # Math.max()
 1744 // -----------------------------
 1745 // (v)ucomis[h/s/d]    #
 1746 // ja   -> b           # a
 1747 // jp   -> NaN         # NaN
 1748 // jb   -> a           # b
 1749 // je   -> a | b       # a & b
 1750 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1751                             XMMRegister a, XMMRegister b, Register rt,
 1752                             bool min, enum FP_PREC pt) {
 1753   Label nan, zero, below, above, done;
 1754 
 1755   emit_fp_ucom(masm, pt, a, b);
 1756 
 1757   if (dst->encoding() != (min ? b : a)->encoding()) {
 1758     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1759   } else {
 1760     __ jccb(Assembler::above, done);
 1761   }
 1762   __ jccb(Assembler::parity, nan);  // PF=1
 1763   __ jccb(Assembler::below, below); // CF=1
 1764 
 1765   // equal
 1766   // Using bitwise operations is a low cost way to compute the correct result
 1767   // for zero and non-zero inputs in this scenario except for NaN, which is
 1768   // handled separately. The mantissa and exponent are valid with either
 1769   // bitwise operation. For zero inputs, the sign bit is chosen according to
 1770   // whether a minimum or maximum value is required.
 1771   if (min) {
 1772     // Negative sign preserved when available (e.g., min(+0, -0) -> -0)
 1773     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1774   } else {
 1775     // Positive sign preserved when available (e.g., max(+0, -0) -> +0)
 1776     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1777   }
 1778   __ jmp(done);
 1779 
 1780   __ bind(above);
 1781   movfp(masm, pt, dst, min ? b : a, rt);
 1782   __ jmp(done);
 1783 
 1784   __ bind(nan);
 1785   if (pt == fp_prec_hlf) {
 1786     __ movl(rt, 0x00007e00); // Float16.NaN
 1787     __ evmovw(dst, rt);
 1788   } else if (pt == fp_prec_flt) {
 1789     __ movl(rt, 0x7fc00000); // Float.NaN
 1790     __ movdl(dst, rt);
 1791   } else {
 1792     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1793     __ movdq(dst, rt);
 1794   }
 1795   __ jmp(done);
 1796 
 1797   __ bind(below);
 1798   movfp(masm, pt, dst, min ? a : b, rt);
 1799 
 1800   __ bind(done);
 1801 }
 1802 
 1803 //=============================================================================
 1804 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1805 
 1806 int ConstantTable::calculate_table_base_offset() const {
 1807   return 0;  // absolute addressing, no offset
 1808 }
 1809 
 1810 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1811 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1812   ShouldNotReachHere();
 1813 }
 1814 
 1815 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1816   // Empty encoding
 1817 }
 1818 
 1819 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1820   return 0;
 1821 }
 1822 
 1823 #ifndef PRODUCT
 1824 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1825   st->print("# MachConstantBaseNode (empty encoding)");
 1826 }
 1827 #endif
 1828 
 1829 
 1830 //=============================================================================
 1831 #ifndef PRODUCT
 1832 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1833   Compile* C = ra_->C;
 1834 
 1835   int framesize = C->output()->frame_size_in_bytes();
 1836   int bangsize = C->output()->bang_size_in_bytes();
 1837   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1838   // Remove wordSize for return addr which is already pushed.
 1839   framesize -= wordSize;
 1840 
 1841   if (C->output()->need_stack_bang(bangsize)) {
 1842     framesize -= wordSize;
 1843     st->print("# stack bang (%d bytes)", bangsize);
 1844     st->print("\n\t");
 1845     st->print("pushq   rbp\t# Save rbp");
 1846     if (PreserveFramePointer) {
 1847         st->print("\n\t");
 1848         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1849     }
 1850     if (framesize) {
 1851       st->print("\n\t");
 1852       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1853     }
 1854   } else {
 1855     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1856     st->print("\n\t");
 1857     framesize -= wordSize;
 1858     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1859     if (PreserveFramePointer) {
 1860       st->print("\n\t");
 1861       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1862       if (framesize > 0) {
 1863         st->print("\n\t");
 1864         st->print("addq    rbp, #%d", framesize);
 1865       }
 1866     }
 1867   }
 1868 
 1869   if (VerifyStackAtCalls) {
 1870     st->print("\n\t");
 1871     framesize -= wordSize;
 1872     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1873 #ifdef ASSERT
 1874     st->print("\n\t");
 1875     st->print("# stack alignment check");
 1876 #endif
 1877   }
 1878   if (C->stub_function() != nullptr) {
 1879     st->print("\n\t");
 1880     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1881     st->print("\n\t");
 1882     st->print("je      fast_entry\t");
 1883     st->print("\n\t");
 1884     st->print("call    #nmethod_entry_barrier_stub\t");
 1885     st->print("\n\tfast_entry:");
 1886   }
 1887   st->cr();
 1888 }
 1889 #endif
 1890 
 1891 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1892   Compile* C = ra_->C;
 1893 
 1894   __ verified_entry(C);
 1895 
 1896   if (ra_->C->stub_function() == nullptr) {
 1897     __ entry_barrier();
 1898   }
 1899 
 1900   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1901     __ bind(*_verified_entry);
 1902   }
 1903 
 1904   C->output()->set_frame_complete(__ offset());
 1905 
 1906   if (C->has_mach_constant_base_node()) {
 1907     // NOTE: We set the table base offset here because users might be
 1908     // emitted before MachConstantBaseNode.
 1909     ConstantTable& constant_table = C->output()->constant_table();
 1910     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1911   }
 1912 }
 1913 
 1914 
 1915 int MachPrologNode::reloc() const
 1916 {
 1917   return 0; // a large enough number
 1918 }
 1919 
 1920 //=============================================================================
 1921 #ifndef PRODUCT
 1922 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1923 {
 1924   Compile* C = ra_->C;
 1925   if (generate_vzeroupper(C)) {
 1926     st->print("vzeroupper");
 1927     st->cr(); st->print("\t");
 1928   }
 1929 
 1930   int framesize = C->output()->frame_size_in_bytes();
 1931   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1932   // Remove word for return adr already pushed
 1933   // and RBP
 1934   framesize -= 2*wordSize;
 1935 
 1936   if (framesize) {
 1937     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1938     st->print("\t");
 1939   }
 1940 
 1941   st->print_cr("popq    rbp");
 1942   if (do_polling() && C->is_method_compilation()) {
 1943     st->print("\t");
 1944     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1945                  "ja      #safepoint_stub\t"
 1946                  "# Safepoint: poll for GC");
 1947   }
 1948 }
 1949 #endif
 1950 
 1951 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1952 {
 1953   Compile* C = ra_->C;
 1954 
 1955   if (generate_vzeroupper(C)) {
 1956     // Clear upper bits of YMM registers when current compiled code uses
 1957     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1958     __ vzeroupper();
 1959   }
 1960 
 1961   // Subtract two words to account for return address and rbp
 1962   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1963   __ remove_frame(initial_framesize, C->needs_stack_repair());
 1964 
 1965   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1966     __ reserved_stack_check();
 1967   }
 1968 
 1969   if (do_polling() && C->is_method_compilation()) {
 1970     Label dummy_label;
 1971     Label* code_stub = &dummy_label;
 1972     if (!C->output()->in_scratch_emit_size()) {
 1973       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1974       C->output()->add_stub(stub);
 1975       code_stub = &stub->entry();
 1976     }
 1977     __ relocate(relocInfo::poll_return_type);
 1978     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1979   }
 1980 }
 1981 
 1982 int MachEpilogNode::reloc() const
 1983 {
 1984   return 2; // a large enough number
 1985 }
 1986 
 1987 const Pipeline* MachEpilogNode::pipeline() const
 1988 {
 1989   return MachNode::pipeline_class();
 1990 }
 1991 
 1992 //=============================================================================
 1993 
 1994 enum RC {
 1995   rc_bad,
 1996   rc_int,
 1997   rc_kreg,
 1998   rc_float,
 1999   rc_stack
 2000 };
 2001 
 2002 static enum RC rc_class(OptoReg::Name reg)
 2003 {
 2004   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2005 
 2006   if (OptoReg::is_stack(reg)) return rc_stack;
 2007 
 2008   VMReg r = OptoReg::as_VMReg(reg);
 2009 
 2010   if (r->is_Register()) return rc_int;
 2011 
 2012   if (r->is_KRegister()) return rc_kreg;
 2013 
 2014   assert(r->is_XMMRegister(), "must be");
 2015   return rc_float;
 2016 }
 2017 
 2018 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2019 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2020                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2021 
 2022 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2023                      int stack_offset, int reg, uint ireg, outputStream* st);
 2024 
 2025 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2026                                       int dst_offset, uint ireg, outputStream* st) {
 2027   if (masm) {
 2028     switch (ireg) {
 2029     case Op_VecS:
 2030       __ movq(Address(rsp, -8), rax);
 2031       __ movl(rax, Address(rsp, src_offset));
 2032       __ movl(Address(rsp, dst_offset), rax);
 2033       __ movq(rax, Address(rsp, -8));
 2034       break;
 2035     case Op_VecD:
 2036       __ pushq(Address(rsp, src_offset));
 2037       __ popq (Address(rsp, dst_offset));
 2038       break;
 2039     case Op_VecX:
 2040       __ pushq(Address(rsp, src_offset));
 2041       __ popq (Address(rsp, dst_offset));
 2042       __ pushq(Address(rsp, src_offset+8));
 2043       __ popq (Address(rsp, dst_offset+8));
 2044       break;
 2045     case Op_VecY:
 2046       __ vmovdqu(Address(rsp, -32), xmm0);
 2047       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2048       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2049       __ vmovdqu(xmm0, Address(rsp, -32));
 2050       break;
 2051     case Op_VecZ:
 2052       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2053       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2054       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2055       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2056       break;
 2057     default:
 2058       ShouldNotReachHere();
 2059     }
 2060 #ifndef PRODUCT
 2061   } else {
 2062     switch (ireg) {
 2063     case Op_VecS:
 2064       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2065                 "movl    rax, [rsp + #%d]\n\t"
 2066                 "movl    [rsp + #%d], rax\n\t"
 2067                 "movq    rax, [rsp - #8]",
 2068                 src_offset, dst_offset);
 2069       break;
 2070     case Op_VecD:
 2071       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2072                 "popq    [rsp + #%d]",
 2073                 src_offset, dst_offset);
 2074       break;
 2075      case Op_VecX:
 2076       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2077                 "popq    [rsp + #%d]\n\t"
 2078                 "pushq   [rsp + #%d]\n\t"
 2079                 "popq    [rsp + #%d]",
 2080                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2081       break;
 2082     case Op_VecY:
 2083       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2084                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2085                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2086                 "vmovdqu xmm0, [rsp - #32]",
 2087                 src_offset, dst_offset);
 2088       break;
 2089     case Op_VecZ:
 2090       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2091                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2092                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2093                 "vmovdqu xmm0, [rsp - #64]",
 2094                 src_offset, dst_offset);
 2095       break;
 2096     default:
 2097       ShouldNotReachHere();
 2098     }
 2099 #endif
 2100   }
 2101 }
 2102 
 2103 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2104                                        PhaseRegAlloc* ra_,
 2105                                        bool do_size,
 2106                                        outputStream* st) const {
 2107   assert(masm != nullptr || st  != nullptr, "sanity");
 2108   // Get registers to move
 2109   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2110   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2111   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2112   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2113 
 2114   enum RC src_second_rc = rc_class(src_second);
 2115   enum RC src_first_rc = rc_class(src_first);
 2116   enum RC dst_second_rc = rc_class(dst_second);
 2117   enum RC dst_first_rc = rc_class(dst_first);
 2118 
 2119   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2120          "must move at least 1 register" );
 2121 
 2122   if (src_first == dst_first && src_second == dst_second) {
 2123     // Self copy, no move
 2124     return 0;
 2125   }
 2126   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_pvectmask() == nullptr) {
 2127     uint ireg = ideal_reg();
 2128     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2129     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2130     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2131       // mem -> mem
 2132       int src_offset = ra_->reg2offset(src_first);
 2133       int dst_offset = ra_->reg2offset(dst_first);
 2134       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2135     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2136       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2137     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2138       int stack_offset = ra_->reg2offset(dst_first);
 2139       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2140     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2141       int stack_offset = ra_->reg2offset(src_first);
 2142       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2143     } else {
 2144       ShouldNotReachHere();
 2145     }
 2146     return 0;
 2147   }
 2148   if (src_first_rc == rc_stack) {
 2149     // mem ->
 2150     if (dst_first_rc == rc_stack) {
 2151       // mem -> mem
 2152       assert(src_second != dst_first, "overlap");
 2153       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2154           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2155         // 64-bit
 2156         int src_offset = ra_->reg2offset(src_first);
 2157         int dst_offset = ra_->reg2offset(dst_first);
 2158         if (masm) {
 2159           __ pushq(Address(rsp, src_offset));
 2160           __ popq (Address(rsp, dst_offset));
 2161 #ifndef PRODUCT
 2162         } else {
 2163           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2164                     "popq    [rsp + #%d]",
 2165                      src_offset, dst_offset);
 2166 #endif
 2167         }
 2168       } else {
 2169         // 32-bit
 2170         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2171         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2172         // No pushl/popl, so:
 2173         int src_offset = ra_->reg2offset(src_first);
 2174         int dst_offset = ra_->reg2offset(dst_first);
 2175         if (masm) {
 2176           __ movq(Address(rsp, -8), rax);
 2177           __ movl(rax, Address(rsp, src_offset));
 2178           __ movl(Address(rsp, dst_offset), rax);
 2179           __ movq(rax, Address(rsp, -8));
 2180 #ifndef PRODUCT
 2181         } else {
 2182           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2183                     "movl    rax, [rsp + #%d]\n\t"
 2184                     "movl    [rsp + #%d], rax\n\t"
 2185                     "movq    rax, [rsp - #8]",
 2186                      src_offset, dst_offset);
 2187 #endif
 2188         }
 2189       }
 2190       return 0;
 2191     } else if (dst_first_rc == rc_int) {
 2192       // mem -> gpr
 2193       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2194           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2195         // 64-bit
 2196         int offset = ra_->reg2offset(src_first);
 2197         if (masm) {
 2198           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2199 #ifndef PRODUCT
 2200         } else {
 2201           st->print("movq    %s, [rsp + #%d]\t# spill",
 2202                      Matcher::regName[dst_first],
 2203                      offset);
 2204 #endif
 2205         }
 2206       } else {
 2207         // 32-bit
 2208         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2209         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2210         int offset = ra_->reg2offset(src_first);
 2211         if (masm) {
 2212           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2213 #ifndef PRODUCT
 2214         } else {
 2215           st->print("movl    %s, [rsp + #%d]\t# spill",
 2216                      Matcher::regName[dst_first],
 2217                      offset);
 2218 #endif
 2219         }
 2220       }
 2221       return 0;
 2222     } else if (dst_first_rc == rc_float) {
 2223       // mem-> xmm
 2224       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2225           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2226         // 64-bit
 2227         int offset = ra_->reg2offset(src_first);
 2228         if (masm) {
 2229           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2230 #ifndef PRODUCT
 2231         } else {
 2232           st->print("%s  %s, [rsp + #%d]\t# spill",
 2233                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2234                      Matcher::regName[dst_first],
 2235                      offset);
 2236 #endif
 2237         }
 2238       } else {
 2239         // 32-bit
 2240         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2241         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2242         int offset = ra_->reg2offset(src_first);
 2243         if (masm) {
 2244           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2245 #ifndef PRODUCT
 2246         } else {
 2247           st->print("movss   %s, [rsp + #%d]\t# spill",
 2248                      Matcher::regName[dst_first],
 2249                      offset);
 2250 #endif
 2251         }
 2252       }
 2253       return 0;
 2254     } else if (dst_first_rc == rc_kreg) {
 2255       // mem -> kreg
 2256       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2257           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2258         // 64-bit
 2259         int offset = ra_->reg2offset(src_first);
 2260         if (masm) {
 2261           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2262 #ifndef PRODUCT
 2263         } else {
 2264           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2265                      Matcher::regName[dst_first],
 2266                      offset);
 2267 #endif
 2268         }
 2269       }
 2270       return 0;
 2271     }
 2272   } else if (src_first_rc == rc_int) {
 2273     // gpr ->
 2274     if (dst_first_rc == rc_stack) {
 2275       // gpr -> mem
 2276       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2277           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2278         // 64-bit
 2279         int offset = ra_->reg2offset(dst_first);
 2280         if (masm) {
 2281           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2282 #ifndef PRODUCT
 2283         } else {
 2284           st->print("movq    [rsp + #%d], %s\t# spill",
 2285                      offset,
 2286                      Matcher::regName[src_first]);
 2287 #endif
 2288         }
 2289       } else {
 2290         // 32-bit
 2291         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2292         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2293         int offset = ra_->reg2offset(dst_first);
 2294         if (masm) {
 2295           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2296 #ifndef PRODUCT
 2297         } else {
 2298           st->print("movl    [rsp + #%d], %s\t# spill",
 2299                      offset,
 2300                      Matcher::regName[src_first]);
 2301 #endif
 2302         }
 2303       }
 2304       return 0;
 2305     } else if (dst_first_rc == rc_int) {
 2306       // gpr -> gpr
 2307       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2308           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2309         // 64-bit
 2310         if (masm) {
 2311           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2312                   as_Register(Matcher::_regEncode[src_first]));
 2313 #ifndef PRODUCT
 2314         } else {
 2315           st->print("movq    %s, %s\t# spill",
 2316                      Matcher::regName[dst_first],
 2317                      Matcher::regName[src_first]);
 2318 #endif
 2319         }
 2320         return 0;
 2321       } else {
 2322         // 32-bit
 2323         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2324         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2325         if (masm) {
 2326           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2327                   as_Register(Matcher::_regEncode[src_first]));
 2328 #ifndef PRODUCT
 2329         } else {
 2330           st->print("movl    %s, %s\t# spill",
 2331                      Matcher::regName[dst_first],
 2332                      Matcher::regName[src_first]);
 2333 #endif
 2334         }
 2335         return 0;
 2336       }
 2337     } else if (dst_first_rc == rc_float) {
 2338       // gpr -> xmm
 2339       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2340           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2341         // 64-bit
 2342         if (masm) {
 2343           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2344 #ifndef PRODUCT
 2345         } else {
 2346           st->print("movdq   %s, %s\t# spill",
 2347                      Matcher::regName[dst_first],
 2348                      Matcher::regName[src_first]);
 2349 #endif
 2350         }
 2351       } else {
 2352         // 32-bit
 2353         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2354         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2355         if (masm) {
 2356           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2357 #ifndef PRODUCT
 2358         } else {
 2359           st->print("movdl   %s, %s\t# spill",
 2360                      Matcher::regName[dst_first],
 2361                      Matcher::regName[src_first]);
 2362 #endif
 2363         }
 2364       }
 2365       return 0;
 2366     } else if (dst_first_rc == rc_kreg) {
 2367       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2368           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2369         // 64-bit
 2370         if (masm) {
 2371           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2372   #ifndef PRODUCT
 2373         } else {
 2374            st->print("kmovq   %s, %s\t# spill",
 2375                        Matcher::regName[dst_first],
 2376                        Matcher::regName[src_first]);
 2377   #endif
 2378         }
 2379       }
 2380       Unimplemented();
 2381       return 0;
 2382     }
 2383   } else if (src_first_rc == rc_float) {
 2384     // xmm ->
 2385     if (dst_first_rc == rc_stack) {
 2386       // xmm -> mem
 2387       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2388           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2389         // 64-bit
 2390         int offset = ra_->reg2offset(dst_first);
 2391         if (masm) {
 2392           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2393 #ifndef PRODUCT
 2394         } else {
 2395           st->print("movsd   [rsp + #%d], %s\t# spill",
 2396                      offset,
 2397                      Matcher::regName[src_first]);
 2398 #endif
 2399         }
 2400       } else {
 2401         // 32-bit
 2402         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2403         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2404         int offset = ra_->reg2offset(dst_first);
 2405         if (masm) {
 2406           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2407 #ifndef PRODUCT
 2408         } else {
 2409           st->print("movss   [rsp + #%d], %s\t# spill",
 2410                      offset,
 2411                      Matcher::regName[src_first]);
 2412 #endif
 2413         }
 2414       }
 2415       return 0;
 2416     } else if (dst_first_rc == rc_int) {
 2417       // xmm -> gpr
 2418       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2419           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2420         // 64-bit
 2421         if (masm) {
 2422           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2423 #ifndef PRODUCT
 2424         } else {
 2425           st->print("movdq   %s, %s\t# spill",
 2426                      Matcher::regName[dst_first],
 2427                      Matcher::regName[src_first]);
 2428 #endif
 2429         }
 2430       } else {
 2431         // 32-bit
 2432         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2433         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2434         if (masm) {
 2435           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2436 #ifndef PRODUCT
 2437         } else {
 2438           st->print("movdl   %s, %s\t# spill",
 2439                      Matcher::regName[dst_first],
 2440                      Matcher::regName[src_first]);
 2441 #endif
 2442         }
 2443       }
 2444       return 0;
 2445     } else if (dst_first_rc == rc_float) {
 2446       // xmm -> xmm
 2447       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2448           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2449         // 64-bit
 2450         if (masm) {
 2451           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2452 #ifndef PRODUCT
 2453         } else {
 2454           st->print("%s  %s, %s\t# spill",
 2455                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2456                      Matcher::regName[dst_first],
 2457                      Matcher::regName[src_first]);
 2458 #endif
 2459         }
 2460       } else {
 2461         // 32-bit
 2462         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2463         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2464         if (masm) {
 2465           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2466 #ifndef PRODUCT
 2467         } else {
 2468           st->print("%s  %s, %s\t# spill",
 2469                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2470                      Matcher::regName[dst_first],
 2471                      Matcher::regName[src_first]);
 2472 #endif
 2473         }
 2474       }
 2475       return 0;
 2476     } else if (dst_first_rc == rc_kreg) {
 2477       assert(false, "Illegal spilling");
 2478       return 0;
 2479     }
 2480   } else if (src_first_rc == rc_kreg) {
 2481     if (dst_first_rc == rc_stack) {
 2482       // mem -> kreg
 2483       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2484           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2485         // 64-bit
 2486         int offset = ra_->reg2offset(dst_first);
 2487         if (masm) {
 2488           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2489 #ifndef PRODUCT
 2490         } else {
 2491           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2492                      offset,
 2493                      Matcher::regName[src_first]);
 2494 #endif
 2495         }
 2496       }
 2497       return 0;
 2498     } else if (dst_first_rc == rc_int) {
 2499       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2500           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2501         // 64-bit
 2502         if (masm) {
 2503           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2504 #ifndef PRODUCT
 2505         } else {
 2506          st->print("kmovq   %s, %s\t# spill",
 2507                      Matcher::regName[dst_first],
 2508                      Matcher::regName[src_first]);
 2509 #endif
 2510         }
 2511       }
 2512       Unimplemented();
 2513       return 0;
 2514     } else if (dst_first_rc == rc_kreg) {
 2515       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2516           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2517         // 64-bit
 2518         if (masm) {
 2519           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2520 #ifndef PRODUCT
 2521         } else {
 2522          st->print("kmovq   %s, %s\t# spill",
 2523                      Matcher::regName[dst_first],
 2524                      Matcher::regName[src_first]);
 2525 #endif
 2526         }
 2527       }
 2528       return 0;
 2529     } else if (dst_first_rc == rc_float) {
 2530       assert(false, "Illegal spill");
 2531       return 0;
 2532     }
 2533   }
 2534 
 2535   assert(0," foo ");
 2536   Unimplemented();
 2537   return 0;
 2538 }
 2539 
 2540 #ifndef PRODUCT
 2541 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2542   implementation(nullptr, ra_, false, st);
 2543 }
 2544 #endif
 2545 
 2546 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2547   implementation(masm, ra_, false, nullptr);
 2548 }
 2549 
 2550 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2551   return MachNode::size(ra_);
 2552 }
 2553 
 2554 //=============================================================================
 2555 #ifndef PRODUCT
 2556 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2557 {
 2558   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2559   int reg = ra_->get_reg_first(this);
 2560   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2561             Matcher::regName[reg], offset);
 2562 }
 2563 #endif
 2564 
 2565 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2566 {
 2567   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2568   int reg = ra_->get_encode(this);
 2569 
 2570   __ lea(as_Register(reg), Address(rsp, offset));
 2571 }
 2572 
 2573 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2574 {
 2575   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2576   if (ra_->get_encode(this) > 15) {
 2577     return (offset < 0x80) ? 6 : 9; // REX2
 2578   } else {
 2579     return (offset < 0x80) ? 5 : 8; // REX
 2580   }
 2581 }
 2582 
 2583 //=============================================================================
 2584 #ifndef PRODUCT
 2585 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2586 {
 2587   st->print_cr("MachVEPNode");
 2588 }
 2589 #endif
 2590 
 2591 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2592 {
 2593   CodeBuffer* cbuf = masm->code();
 2594   if (!_verified) {
 2595     __ ic_check(1);
 2596   } else {
 2597     if (ra_->C->stub_function() == nullptr) {
 2598       // Emit the entry barrier in a temporary frame before unpacking because
 2599       // it can deopt, which would require packing the scalarized args again.
 2600       __ verified_entry(ra_->C, 0);
 2601       __ entry_barrier();
 2602       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2603       __ remove_frame(initial_framesize, false);
 2604     }
 2605     // Unpack inline type args passed as oop and then jump to
 2606     // the verified entry point (skipping the unverified entry).
 2607     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2608     // Emit code for verified entry and save increment for stack repair on return
 2609     __ verified_entry(ra_->C, sp_inc);
 2610     if (Compile::current()->output()->in_scratch_emit_size()) {
 2611       Label dummy_verified_entry;
 2612       __ jmp(dummy_verified_entry);
 2613     } else {
 2614       __ jmp(*_verified_entry);
 2615     }
 2616   }
 2617   if (ra_->C->stub_function() == nullptr) {
 2618     // Pad so that the next call to MachVEPNode::emit() starts out with the
 2619     // correct alignment.  This is needed by entry_barrier() to align the
 2620     // compare.  But unfortunately we need to align all 4 MachVEPNodes because
 2621     // entry point offsets are computed using scratch_emit_size(), so starting
 2622     // alignment must match the alignment of the scratch buffer, otherwise the sizes
 2623     // will be off.
 2624     __ align(4);
 2625   }
 2626 }
 2627 
 2628 //=============================================================================
 2629 #ifndef PRODUCT
 2630 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2631 {
 2632   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2633   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2634   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2635 }
 2636 #endif
 2637 
 2638 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2639 {
 2640   __ ic_check(InteriorEntryAlignment);
 2641 }
 2642 
 2643 
 2644 //=============================================================================
 2645 
 2646 bool Matcher::supports_vector_calling_convention(void) {
 2647   return EnableVectorSupport;
 2648 }
 2649 
 2650 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2651   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2652 }
 2653 
 2654 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2655   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2656 }
 2657 
 2658 #ifdef ASSERT
 2659 static bool is_ndd_demotable(const MachNode* mdef) {
 2660   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2661 }
 2662 #endif
 2663 
 2664 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2665                                             int oper_index) {
 2666   if (mdef == nullptr) {
 2667     return false;
 2668   }
 2669 
 2670   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2671       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2672     assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
 2673     assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
 2674     return false;
 2675   }
 2676 
 2677   // Complex memory operand covers multiple incoming edges needed for
 2678   // address computation. Biasing def towards any address component will not
 2679   // result in NDD demotion by assembler.
 2680   if (mdef->operand_num_edges(oper_index) != 1) {
 2681     return false;
 2682   }
 2683 
 2684   // Demotion candidate must be register mask compatible with definition.
 2685   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2686   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2687     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2688     return false;
 2689   }
 2690 
 2691   switch (oper_index) {
 2692   // First operand of MachNode corresponding to Intel APX NDD selection
 2693   // pattern can share its assigned register with definition operand if
 2694   // their live ranges do not overlap. In such a scenario we can demote
 2695   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2696   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2697   // are decorated with a special flag by instruction selector.
 2698   case 1:
 2699     return is_ndd_demotable_opr1(mdef);
 2700 
 2701   // Definition operand of commutative operation can be biased towards second
 2702   // operand.
 2703   case 2:
 2704     return is_ndd_demotable_opr2(mdef);
 2705 
 2706   // Current scheme only selects up to two biasing candidates
 2707   default:
 2708     assert(false, "unhandled operand index: %s", mdef->Name());
 2709     break;
 2710   }
 2711 
 2712   return false;
 2713 }
 2714 
 2715 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2716   assert(EnableVectorSupport, "sanity");
 2717   int lo = XMM0_num;
 2718   int hi = XMM0b_num;
 2719   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2720   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2721   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2722   return OptoRegPair(hi, lo);
 2723 }
 2724 
 2725 // Is this branch offset short enough that a short branch can be used?
 2726 //
 2727 // NOTE: If the platform does not provide any short branch variants, then
 2728 //       this method should return false for offset 0.
 2729 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2730   // The passed offset is relative to address of the branch.
 2731   // On 86 a branch displacement is calculated relative to address
 2732   // of a next instruction.
 2733   offset -= br_size;
 2734 
 2735   // the short version of jmpConUCF2 contains multiple branches,
 2736   // making the reach slightly less
 2737   if (rule == jmpConUCF2_rule)
 2738     return (-126 <= offset && offset <= 125);
 2739   return (-128 <= offset && offset <= 127);
 2740 }
 2741 
 2742 #ifdef ASSERT
 2743 // Return whether or not this register is ever used as an argument.
 2744 bool Matcher::can_be_java_arg(int reg)
 2745 {
 2746   return
 2747     reg ==  RDI_num || reg == RDI_H_num ||
 2748     reg ==  RSI_num || reg == RSI_H_num ||
 2749     reg ==  RDX_num || reg == RDX_H_num ||
 2750     reg ==  RCX_num || reg == RCX_H_num ||
 2751     reg ==   R8_num || reg ==  R8_H_num ||
 2752     reg ==   R9_num || reg ==  R9_H_num ||
 2753     reg ==  R12_num || reg == R12_H_num ||
 2754     reg == XMM0_num || reg == XMM0b_num ||
 2755     reg == XMM1_num || reg == XMM1b_num ||
 2756     reg == XMM2_num || reg == XMM2b_num ||
 2757     reg == XMM3_num || reg == XMM3b_num ||
 2758     reg == XMM4_num || reg == XMM4b_num ||
 2759     reg == XMM5_num || reg == XMM5b_num ||
 2760     reg == XMM6_num || reg == XMM6b_num ||
 2761     reg == XMM7_num || reg == XMM7b_num;
 2762 }
 2763 #endif
 2764 
 2765 uint Matcher::int_pressure_limit()
 2766 {
 2767   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2768 }
 2769 
 2770 uint Matcher::float_pressure_limit()
 2771 {
 2772   // After experiment around with different values, the following default threshold
 2773   // works best for LCM's register pressure scheduling on x64.
 2774   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2775   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2776   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2777 }
 2778 
 2779 // Register for the first projection of an int pair
 2780 const RegMask& Matcher::firstI_proj_mask() {
 2781   return INT_RAX_REG_mask();
 2782 }
 2783 
 2784 // Register for the second projection of an int pair
 2785 const RegMask& Matcher::secondI_proj_mask() {
 2786   return INT_RDX_REG_mask();
 2787 }
 2788 
 2789 // Register for the first projection of a long pair
 2790 const RegMask& Matcher::firstL_proj_mask() {
 2791   return LONG_RAX_REG_mask();
 2792 }
 2793 
 2794 // Register for the second projection of a long pair
 2795 const RegMask& Matcher::secondL_proj_mask() {
 2796   return LONG_RDX_REG_mask();
 2797 }
 2798 
 2799 %}
 2800 
 2801 source_hpp %{
 2802 // Header information of the source block.
 2803 // Method declarations/definitions which are used outside
 2804 // the ad-scope can conveniently be defined here.
 2805 //
 2806 // To keep related declarations/definitions/uses close together,
 2807 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2808 
 2809 #include "runtime/vm_version.hpp"
 2810 
 2811 class NativeJump;
 2812 
 2813 class CallStubImpl {
 2814 
 2815   //--------------------------------------------------------------
 2816   //---<  Used for optimization in Compile::shorten_branches  >---
 2817   //--------------------------------------------------------------
 2818 
 2819  public:
 2820   // Size of call trampoline stub.
 2821   static uint size_call_trampoline() {
 2822     return 0; // no call trampolines on this platform
 2823   }
 2824 
 2825   // number of relocations needed by a call trampoline stub
 2826   static uint reloc_call_trampoline() {
 2827     return 0; // no call trampolines on this platform
 2828   }
 2829 };
 2830 
 2831 class HandlerImpl {
 2832 
 2833  public:
 2834 
 2835   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2836 
 2837   static uint size_deopt_handler() {
 2838     // one call and one jmp.
 2839     return 7;
 2840   }
 2841 };
 2842 
 2843 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2844   switch(bytes) {
 2845     case  4: // fall-through
 2846     case  8: // fall-through
 2847     case 16: return Assembler::AVX_128bit;
 2848     case 32: return Assembler::AVX_256bit;
 2849     case 64: return Assembler::AVX_512bit;
 2850 
 2851     default: {
 2852       ShouldNotReachHere();
 2853       return Assembler::AVX_NoVec;
 2854     }
 2855   }
 2856 }
 2857 
 2858 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2859   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2860 }
 2861 
 2862 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2863   uint def_idx = use->operand_index(opnd);
 2864   Node* def = use->in(def_idx);
 2865   return vector_length_encoding(def);
 2866 }
 2867 
 2868 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2869   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2870          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2871 }
 2872 
 2873 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2874   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2875            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2876 }
 2877 
 2878 class Node::PD {
 2879 public:
 2880   enum NodeFlags : uint64_t {
 2881     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2882     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2883     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2884     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2885     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2886     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2887     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2888     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2889     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2890     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2891     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2892     Flag_ndd_demotable_opr1   = Node::_last_flag << 12,
 2893     Flag_ndd_demotable_opr2   = Node::_last_flag << 13,
 2894     _last_flag                = Flag_ndd_demotable_opr2
 2895   };
 2896 };
 2897 
 2898 %} // end source_hpp
 2899 
 2900 source %{
 2901 
 2902 #include "opto/addnode.hpp"
 2903 #include "c2_intelJccErratum_x86.hpp"
 2904 
 2905 void PhaseOutput::pd_perform_mach_node_analysis() {
 2906   if (VM_Version::has_intel_jcc_erratum()) {
 2907     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2908     _buf_sizes._code += extra_padding;
 2909   }
 2910 }
 2911 
 2912 int MachNode::pd_alignment_required() const {
 2913   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2914     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2915     return IntelJccErratum::largest_jcc_size() + 1;
 2916   } else {
 2917     return 1;
 2918   }
 2919 }
 2920 
 2921 int MachNode::compute_padding(int current_offset) const {
 2922   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2923     Compile* C = Compile::current();
 2924     PhaseOutput* output = C->output();
 2925     Block* block = output->block();
 2926     int index = output->index();
 2927     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2928   } else {
 2929     return 0;
 2930   }
 2931 }
 2932 
 2933 // Emit deopt handler code.
 2934 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2935 
 2936   // Note that the code buffer's insts_mark is always relative to insts.
 2937   // That's why we must use the macroassembler to generate a handler.
 2938   address base = __ start_a_stub(size_deopt_handler());
 2939   if (base == nullptr) {
 2940     ciEnv::current()->record_failure("CodeCache is full");
 2941     return 0;  // CodeBuffer::expand failed
 2942   }
 2943   int offset = __ offset();
 2944 
 2945   Label start;
 2946   __ bind(start);
 2947 
 2948   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2949 
 2950   int entry_offset = __ offset();
 2951 
 2952   __ jmp(start);
 2953 
 2954   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2955   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2956          "out of bounds read in post-call NOP check");
 2957   __ end_a_stub();
 2958   return entry_offset;
 2959 }
 2960 
 2961 static Assembler::Width widthForType(BasicType bt) {
 2962   if (bt == T_BYTE) {
 2963     return Assembler::B;
 2964   } else if (bt == T_SHORT) {
 2965     return Assembler::W;
 2966   } else if (bt == T_INT) {
 2967     return Assembler::D;
 2968   } else {
 2969     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2970     return Assembler::Q;
 2971   }
 2972 }
 2973 
 2974 //=============================================================================
 2975 
 2976   // Float masks come from different places depending on platform.
 2977   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2978   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2979   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2980   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2981   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2982   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2983   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2984   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2985   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2986   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2987   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2988   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2989   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2990   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2991   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2992   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2993   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2994   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 2995   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 2996 
 2997 //=============================================================================
 2998 bool Matcher::match_rule_supported(int opcode) {
 2999   if (!has_match_rule(opcode)) {
 3000     return false; // no match rule present
 3001   }
 3002   switch (opcode) {
 3003     case Op_AbsVL:
 3004     case Op_StoreVectorScatter:
 3005       if (UseAVX < 3) {
 3006         return false;
 3007       }
 3008       break;
 3009     case Op_PopCountI:
 3010     case Op_PopCountL:
 3011       if (!UsePopCountInstruction) {
 3012         return false;
 3013       }
 3014       break;
 3015     case Op_PopCountVI:
 3016       if (UseAVX < 2) {
 3017         return false;
 3018       }
 3019       break;
 3020     case Op_CompressV:
 3021     case Op_ExpandV:
 3022     case Op_PopCountVL:
 3023       if (UseAVX < 2) {
 3024         return false;
 3025       }
 3026       break;
 3027     case Op_MulVI:
 3028       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3029         return false;
 3030       }
 3031       break;
 3032     case Op_MulVL:
 3033       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3034         return false;
 3035       }
 3036       break;
 3037     case Op_MulReductionVL:
 3038       if (VM_Version::supports_avx512dq() == false) {
 3039         return false;
 3040       }
 3041       break;
 3042     case Op_AbsVB:
 3043     case Op_AbsVS:
 3044     case Op_AbsVI:
 3045     case Op_AddReductionVI:
 3046     case Op_AndReductionV:
 3047     case Op_OrReductionV:
 3048     case Op_XorReductionV:
 3049       if (UseSSE < 3) { // requires at least SSSE3
 3050         return false;
 3051       }
 3052       break;
 3053     case Op_MaxHF:
 3054     case Op_MinHF:
 3055       if (!VM_Version::supports_avx512vlbw()) {
 3056         return false;
 3057       }  // fallthrough
 3058     case Op_AddHF:
 3059     case Op_DivHF:
 3060     case Op_FmaHF:
 3061     case Op_MulHF:
 3062     case Op_ReinterpretS2HF:
 3063     case Op_ReinterpretHF2S:
 3064     case Op_SubHF:
 3065     case Op_SqrtHF:
 3066       if (!VM_Version::supports_avx512_fp16()) {
 3067         return false;
 3068       }
 3069       break;
 3070     case Op_VectorLoadShuffle:
 3071     case Op_VectorRearrange:
 3072     case Op_MulReductionVI:
 3073       if (UseSSE < 4) { // requires at least SSE4
 3074         return false;
 3075       }
 3076       break;
 3077     case Op_IsInfiniteF:
 3078     case Op_IsInfiniteD:
 3079       if (!VM_Version::supports_avx512dq()) {
 3080         return false;
 3081       }
 3082       break;
 3083     case Op_SqrtVD:
 3084     case Op_SqrtVF:
 3085     case Op_VectorMaskCmp:
 3086     case Op_VectorCastB2X:
 3087     case Op_VectorCastS2X:
 3088     case Op_VectorCastI2X:
 3089     case Op_VectorCastL2X:
 3090     case Op_VectorCastF2X:
 3091     case Op_VectorCastD2X:
 3092     case Op_VectorUCastB2X:
 3093     case Op_VectorUCastS2X:
 3094     case Op_VectorUCastI2X:
 3095     case Op_VectorMaskCast:
 3096       if (UseAVX < 1) { // enabled for AVX only
 3097         return false;
 3098       }
 3099       break;
 3100     case Op_PopulateIndex:
 3101       if (UseAVX < 2) {
 3102         return false;
 3103       }
 3104       break;
 3105     case Op_RoundVF:
 3106       if (UseAVX < 2) { // enabled for AVX2 only
 3107         return false;
 3108       }
 3109       break;
 3110     case Op_RoundVD:
 3111       if (UseAVX < 3) {
 3112         return false;  // enabled for AVX3 only
 3113       }
 3114       break;
 3115     case Op_CompareAndSwapL:
 3116     case Op_CompareAndSwapP:
 3117       break;
 3118     case Op_StrIndexOf:
 3119       if (!UseSSE42Intrinsics) {
 3120         return false;
 3121       }
 3122       break;
 3123     case Op_StrIndexOfChar:
 3124       if (!UseSSE42Intrinsics) {
 3125         return false;
 3126       }
 3127       break;
 3128     case Op_OnSpinWait:
 3129       if (VM_Version::supports_on_spin_wait() == false) {
 3130         return false;
 3131       }
 3132       break;
 3133     case Op_MulVB:
 3134     case Op_LShiftVB:
 3135     case Op_RShiftVB:
 3136     case Op_URShiftVB:
 3137     case Op_VectorInsert:
 3138     case Op_VectorLoadMask:
 3139     case Op_VectorStoreMask:
 3140     case Op_VectorBlend:
 3141       if (UseSSE < 4) {
 3142         return false;
 3143       }
 3144       break;
 3145     case Op_MaxD:
 3146     case Op_MaxF:
 3147     case Op_MinD:
 3148     case Op_MinF:
 3149       if (UseAVX < 1) { // enabled for AVX only
 3150         return false;
 3151       }
 3152       break;
 3153     case Op_CacheWB:
 3154     case Op_CacheWBPreSync:
 3155     case Op_CacheWBPostSync:
 3156       if (!VM_Version::supports_data_cache_line_flush()) {
 3157         return false;
 3158       }
 3159       break;
 3160     case Op_ExtractB:
 3161     case Op_ExtractL:
 3162     case Op_ExtractI:
 3163     case Op_RoundDoubleMode:
 3164       if (UseSSE < 4) {
 3165         return false;
 3166       }
 3167       break;
 3168     case Op_RoundDoubleModeV:
 3169       if (VM_Version::supports_avx() == false) {
 3170         return false; // 128bit vroundpd is not available
 3171       }
 3172       break;
 3173     case Op_LoadVectorGather:
 3174     case Op_LoadVectorGatherMasked:
 3175       if (UseAVX < 2) {
 3176         return false;
 3177       }
 3178       break;
 3179     case Op_FmaF:
 3180     case Op_FmaD:
 3181     case Op_FmaVD:
 3182     case Op_FmaVF:
 3183       if (!UseFMA) {
 3184         return false;
 3185       }
 3186       break;
 3187     case Op_MacroLogicV:
 3188       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3189         return false;
 3190       }
 3191       break;
 3192 
 3193     case Op_VectorCmpMasked:
 3194       if (!UseCountTrailingZerosInstruction) {
 3195         return false;
 3196       }
 3197       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3198         return false;
 3199       }
 3200       break;
 3201     case Op_VectorMaskGen:
 3202       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3203         return false;
 3204       }
 3205       break;
 3206     case Op_VectorMaskFirstTrue:
 3207     case Op_VectorMaskLastTrue:
 3208     case Op_VectorMaskTrueCount:
 3209     case Op_VectorMaskToLong:
 3210       if (UseAVX < 1) {
 3211          return false;
 3212       }
 3213       break;
 3214     case Op_RoundF:
 3215     case Op_RoundD:
 3216       break;
 3217     case Op_CopySignD:
 3218     case Op_CopySignF:
 3219       if (UseAVX < 3)  {
 3220         return false;
 3221       }
 3222       if (!VM_Version::supports_avx512vl()) {
 3223         return false;
 3224       }
 3225       break;
 3226     case Op_CompressBits:
 3227     case Op_ExpandBits:
 3228       if (!VM_Version::supports_bmi2()) {
 3229         return false;
 3230       }
 3231       break;
 3232     case Op_CompressM:
 3233       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3234         return false;
 3235       }
 3236       break;
 3237     case Op_ConvF2HF:
 3238     case Op_ConvHF2F:
 3239       if (!VM_Version::supports_float16()) {
 3240         return false;
 3241       }
 3242       break;
 3243     case Op_VectorCastF2HF:
 3244     case Op_VectorCastHF2F:
 3245       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3246         return false;
 3247       }
 3248       break;
 3249   }
 3250   return true;  // Match rules are supported by default.
 3251 }
 3252 
 3253 //------------------------------------------------------------------------
 3254 
 3255 static inline bool is_pop_count_instr_target(BasicType bt) {
 3256   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3257          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3258 }
 3259 
 3260 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3261   return match_rule_supported_vector(opcode, vlen, bt);
 3262 }
 3263 
 3264 // Identify extra cases that we might want to provide match rules for vector nodes and
 3265 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3266 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3267   if (!match_rule_supported(opcode)) {
 3268     return false;
 3269   }
 3270   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3271   //   * SSE2 supports 128bit vectors for all types;
 3272   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3273   //   * AVX2 supports 256bit vectors for all types;
 3274   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3275   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3276   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3277   // And MaxVectorSize is taken into account as well.
 3278   if (!vector_size_supported(bt, vlen)) {
 3279     return false;
 3280   }
 3281   // Special cases which require vector length follow:
 3282   //   * implementation limitations
 3283   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3284   //   * 128bit vroundpd instruction is present only in AVX1
 3285   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3286   switch (opcode) {
 3287     case Op_MaxVHF:
 3288     case Op_MinVHF:
 3289       if (!VM_Version::supports_avx512bw()) {
 3290         return false;
 3291       }
 3292     case Op_AddVHF:
 3293     case Op_DivVHF:
 3294     case Op_FmaVHF:
 3295     case Op_MulVHF:
 3296     case Op_SubVHF:
 3297     case Op_SqrtVHF:
 3298       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3299         return false;
 3300       }
 3301       if (!VM_Version::supports_avx512_fp16()) {
 3302         return false;
 3303       }
 3304       break;
 3305     case Op_AbsVF:
 3306     case Op_NegVF:
 3307       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3308         return false; // 512bit vandps and vxorps are not available
 3309       }
 3310       break;
 3311     case Op_AbsVD:
 3312     case Op_NegVD:
 3313       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3314         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3315       }
 3316       break;
 3317     case Op_RotateRightV:
 3318     case Op_RotateLeftV:
 3319       if (bt != T_INT && bt != T_LONG) {
 3320         return false;
 3321       } // fallthrough
 3322     case Op_MacroLogicV:
 3323       if (!VM_Version::supports_evex() ||
 3324           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3325         return false;
 3326       }
 3327       break;
 3328     case Op_ClearArray:
 3329     case Op_VectorMaskGen:
 3330     case Op_VectorCmpMasked:
 3331       if (!VM_Version::supports_avx512bw()) {
 3332         return false;
 3333       }
 3334       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3335         return false;
 3336       }
 3337       break;
 3338     case Op_LoadVectorMasked:
 3339     case Op_StoreVectorMasked:
 3340       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3341         return false;
 3342       }
 3343       break;
 3344     case Op_UMinV:
 3345     case Op_UMaxV:
 3346       if (UseAVX == 0) {
 3347         return false;
 3348       }
 3349       break;
 3350     case Op_UMinReductionV:
 3351     case Op_UMaxReductionV:
 3352       if (UseAVX == 0) {
 3353         return false;
 3354       }
 3355       if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
 3356         return false;
 3357       }
 3358       if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
 3359         return false;
 3360       }
 3361       break;
 3362     case Op_MaxV:
 3363     case Op_MinV:
 3364       if (UseSSE < 4 && is_integral_type(bt)) {
 3365         return false;
 3366       }
 3367       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3368           // Float/Double intrinsics are enabled for AVX family currently.
 3369           if (UseAVX == 0) {
 3370             return false;
 3371           }
 3372           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3373             return false;
 3374           }
 3375       }
 3376       break;
 3377     case Op_CallLeafVector:
 3378       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3379         return false;
 3380       }
 3381       break;
 3382     case Op_AddReductionVI:
 3383       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3384         return false;
 3385       }
 3386       // fallthrough
 3387     case Op_AndReductionV:
 3388     case Op_OrReductionV:
 3389     case Op_XorReductionV:
 3390       if (is_subword_type(bt) && (UseSSE < 4)) {
 3391         return false;
 3392       }
 3393       break;
 3394     case Op_MinReductionV:
 3395     case Op_MaxReductionV:
 3396       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3397         return false;
 3398       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3399         return false;
 3400       }
 3401       // Float/Double intrinsics enabled for AVX family.
 3402       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3403         return false;
 3404       }
 3405       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3406         return false;
 3407       }
 3408       break;
 3409     case Op_VectorBlend:
 3410       if (UseAVX == 0 && size_in_bits < 128) {
 3411         return false;
 3412       }
 3413       break;
 3414     case Op_VectorTest:
 3415       if (UseSSE < 4) {
 3416         return false; // Implementation limitation
 3417       } else if (size_in_bits < 32) {
 3418         return false; // Implementation limitation
 3419       }
 3420       break;
 3421     case Op_VectorLoadShuffle:
 3422     case Op_VectorRearrange:
 3423       if(vlen == 2) {
 3424         return false; // Implementation limitation due to how shuffle is loaded
 3425       } else if (size_in_bits == 256 && UseAVX < 2) {
 3426         return false; // Implementation limitation
 3427       }
 3428       break;
 3429     case Op_VectorLoadMask:
 3430     case Op_VectorMaskCast:
 3431       if (size_in_bits == 256 && UseAVX < 2) {
 3432         return false; // Implementation limitation
 3433       }
 3434       // fallthrough
 3435     case Op_VectorStoreMask:
 3436       if (vlen == 2) {
 3437         return false; // Implementation limitation
 3438       }
 3439       break;
 3440     case Op_PopulateIndex:
 3441       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3442         return false;
 3443       }
 3444       break;
 3445     case Op_VectorCastB2X:
 3446     case Op_VectorCastS2X:
 3447     case Op_VectorCastI2X:
 3448       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3449         return false;
 3450       }
 3451       break;
 3452     case Op_VectorCastL2X:
 3453       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3454         return false;
 3455       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3456         return false;
 3457       }
 3458       break;
 3459     case Op_VectorCastF2X: {
 3460         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3461         // happen after intermediate conversion to integer and special handling
 3462         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3463         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3464         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3465           return false;
 3466         }
 3467       }
 3468       // fallthrough
 3469     case Op_VectorCastD2X:
 3470       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3471         return false;
 3472       }
 3473       break;
 3474     case Op_VectorCastF2HF:
 3475     case Op_VectorCastHF2F:
 3476       if (!VM_Version::supports_f16c() &&
 3477          ((!VM_Version::supports_evex() ||
 3478          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3479         return false;
 3480       }
 3481       break;
 3482     case Op_RoundVD:
 3483       if (!VM_Version::supports_avx512dq()) {
 3484         return false;
 3485       }
 3486       break;
 3487     case Op_MulReductionVI:
 3488       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3489         return false;
 3490       }
 3491       break;
 3492     case Op_LoadVectorGatherMasked:
 3493       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3494         return false;
 3495       }
 3496       if (is_subword_type(bt) &&
 3497          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3498           (size_in_bits < 64)                                      ||
 3499           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3500         return false;
 3501       }
 3502       break;
 3503     case Op_StoreVectorScatterMasked:
 3504     case Op_StoreVectorScatter:
 3505       if (is_subword_type(bt)) {
 3506         return false;
 3507       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3508         return false;
 3509       }
 3510       // fallthrough
 3511     case Op_LoadVectorGather:
 3512       if (!is_subword_type(bt) && size_in_bits == 64) {
 3513         return false;
 3514       }
 3515       if (is_subword_type(bt) && size_in_bits < 64) {
 3516         return false;
 3517       }
 3518       break;
 3519     case Op_SaturatingAddV:
 3520     case Op_SaturatingSubV:
 3521       if (UseAVX < 1) {
 3522         return false; // Implementation limitation
 3523       }
 3524       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3525         return false;
 3526       }
 3527       break;
 3528     case Op_SelectFromTwoVector:
 3529        if (size_in_bits < 128) {
 3530          return false;
 3531        }
 3532        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3533          return false;
 3534        }
 3535        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3536          return false;
 3537        }
 3538        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3539          return false;
 3540        }
 3541        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3542          return false;
 3543        }
 3544        break;
 3545     case Op_MaskAll:
 3546       if (!VM_Version::supports_evex()) {
 3547         return false;
 3548       }
 3549       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3550         return false;
 3551       }
 3552       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3553         return false;
 3554       }
 3555       break;
 3556     case Op_VectorMaskCmp:
 3557       if (vlen < 2 || size_in_bits < 32) {
 3558         return false;
 3559       }
 3560       break;
 3561     case Op_CompressM:
 3562       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3563         return false;
 3564       }
 3565       break;
 3566     case Op_CompressV:
 3567     case Op_ExpandV:
 3568       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3569         return false;
 3570       }
 3571       if (size_in_bits < 128 ) {
 3572         return false;
 3573       }
 3574     case Op_VectorLongToMask:
 3575       if (UseAVX < 1) {
 3576         return false;
 3577       }
 3578       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3579         return false;
 3580       }
 3581       break;
 3582     case Op_SignumVD:
 3583     case Op_SignumVF:
 3584       if (UseAVX < 1) {
 3585         return false;
 3586       }
 3587       break;
 3588     case Op_PopCountVI:
 3589     case Op_PopCountVL: {
 3590         if (!is_pop_count_instr_target(bt) &&
 3591             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3592           return false;
 3593         }
 3594       }
 3595       break;
 3596     case Op_ReverseV:
 3597     case Op_ReverseBytesV:
 3598       if (UseAVX < 2) {
 3599         return false;
 3600       }
 3601       break;
 3602     case Op_CountTrailingZerosV:
 3603     case Op_CountLeadingZerosV:
 3604       if (UseAVX < 2) {
 3605         return false;
 3606       }
 3607       break;
 3608   }
 3609   return true;  // Per default match rules are supported.
 3610 }
 3611 
 3612 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3613   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3614   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3615   // of their non-masked counterpart with mask edge being the differentiator.
 3616   // This routine does a strict check on the existence of masked operation patterns
 3617   // by returning a default false value for all the other opcodes apart from the
 3618   // ones whose masked instruction patterns are defined in this file.
 3619   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3620     return false;
 3621   }
 3622 
 3623   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3624   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3625     return false;
 3626   }
 3627   switch(opcode) {
 3628     // Unary masked operations
 3629     case Op_AbsVB:
 3630     case Op_AbsVS:
 3631       if(!VM_Version::supports_avx512bw()) {
 3632         return false;  // Implementation limitation
 3633       }
 3634     case Op_AbsVI:
 3635     case Op_AbsVL:
 3636       return true;
 3637 
 3638     // Ternary masked operations
 3639     case Op_FmaVF:
 3640     case Op_FmaVD:
 3641       return true;
 3642 
 3643     case Op_MacroLogicV:
 3644       if(bt != T_INT && bt != T_LONG) {
 3645         return false;
 3646       }
 3647       return true;
 3648 
 3649     // Binary masked operations
 3650     case Op_AddVB:
 3651     case Op_AddVS:
 3652     case Op_SubVB:
 3653     case Op_SubVS:
 3654     case Op_MulVS:
 3655     case Op_LShiftVS:
 3656     case Op_RShiftVS:
 3657     case Op_URShiftVS:
 3658       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3659       if (!VM_Version::supports_avx512bw()) {
 3660         return false;  // Implementation limitation
 3661       }
 3662       return true;
 3663 
 3664     case Op_MulVL:
 3665       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3666       if (!VM_Version::supports_avx512dq()) {
 3667         return false;  // Implementation limitation
 3668       }
 3669       return true;
 3670 
 3671     case Op_AndV:
 3672     case Op_OrV:
 3673     case Op_XorV:
 3674     case Op_RotateRightV:
 3675     case Op_RotateLeftV:
 3676       if (bt != T_INT && bt != T_LONG) {
 3677         return false; // Implementation limitation
 3678       }
 3679       return true;
 3680 
 3681     case Op_VectorLoadMask:
 3682       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3683       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3684         return false;
 3685       }
 3686       return true;
 3687 
 3688     case Op_AddVI:
 3689     case Op_AddVL:
 3690     case Op_AddVF:
 3691     case Op_AddVD:
 3692     case Op_SubVI:
 3693     case Op_SubVL:
 3694     case Op_SubVF:
 3695     case Op_SubVD:
 3696     case Op_MulVI:
 3697     case Op_MulVF:
 3698     case Op_MulVD:
 3699     case Op_DivVF:
 3700     case Op_DivVD:
 3701     case Op_SqrtVF:
 3702     case Op_SqrtVD:
 3703     case Op_LShiftVI:
 3704     case Op_LShiftVL:
 3705     case Op_RShiftVI:
 3706     case Op_RShiftVL:
 3707     case Op_URShiftVI:
 3708     case Op_URShiftVL:
 3709     case Op_LoadVectorMasked:
 3710     case Op_StoreVectorMasked:
 3711     case Op_LoadVectorGatherMasked:
 3712     case Op_StoreVectorScatterMasked:
 3713       return true;
 3714 
 3715     case Op_UMinV:
 3716     case Op_UMaxV:
 3717       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3718         return false;
 3719       } // fallthrough
 3720     case Op_MaxV:
 3721     case Op_MinV:
 3722       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3723         return false; // Implementation limitation
 3724       }
 3725       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3726         return false; // Implementation limitation
 3727       }
 3728       return true;
 3729     case Op_SaturatingAddV:
 3730     case Op_SaturatingSubV:
 3731       if (!is_subword_type(bt)) {
 3732         return false;
 3733       }
 3734       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3735         return false; // Implementation limitation
 3736       }
 3737       return true;
 3738 
 3739     case Op_VectorMaskCmp:
 3740       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3741         return false; // Implementation limitation
 3742       }
 3743       return true;
 3744 
 3745     case Op_VectorRearrange:
 3746       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3747         return false; // Implementation limitation
 3748       }
 3749       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3750         return false; // Implementation limitation
 3751       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3752         return false; // Implementation limitation
 3753       }
 3754       return true;
 3755 
 3756     // Binary Logical operations
 3757     case Op_AndVMask:
 3758     case Op_OrVMask:
 3759     case Op_XorVMask:
 3760       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3761         return false; // Implementation limitation
 3762       }
 3763       return true;
 3764 
 3765     case Op_PopCountVI:
 3766     case Op_PopCountVL:
 3767       if (!is_pop_count_instr_target(bt)) {
 3768         return false;
 3769       }
 3770       return true;
 3771 
 3772     case Op_MaskAll:
 3773       return true;
 3774 
 3775     case Op_CountLeadingZerosV:
 3776       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3777         return true;
 3778       }
 3779     default:
 3780       return false;
 3781   }
 3782 }
 3783 
 3784 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3785   return false;
 3786 }
 3787 
 3788 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3789 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3790   switch (elem_bt) {
 3791     case T_BYTE:  return false;
 3792     case T_SHORT: return !VM_Version::supports_avx512bw();
 3793     case T_INT:   return !VM_Version::supports_avx();
 3794     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3795     default:
 3796       ShouldNotReachHere();
 3797       return false;
 3798   }
 3799 }
 3800 
 3801 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3802   // Prefer predicate if the mask type is "TypePVectMask".
 3803   return vt->isa_pvectmask() != nullptr;
 3804 }
 3805 
 3806 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3807   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3808   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3809   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3810       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3811     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3812     return new legVecZOper();
 3813   }
 3814   if (legacy) {
 3815     switch (ideal_reg) {
 3816       case Op_VecS: return new legVecSOper();
 3817       case Op_VecD: return new legVecDOper();
 3818       case Op_VecX: return new legVecXOper();
 3819       case Op_VecY: return new legVecYOper();
 3820       case Op_VecZ: return new legVecZOper();
 3821     }
 3822   } else {
 3823     switch (ideal_reg) {
 3824       case Op_VecS: return new vecSOper();
 3825       case Op_VecD: return new vecDOper();
 3826       case Op_VecX: return new vecXOper();
 3827       case Op_VecY: return new vecYOper();
 3828       case Op_VecZ: return new vecZOper();
 3829     }
 3830   }
 3831   ShouldNotReachHere();
 3832   return nullptr;
 3833 }
 3834 
 3835 bool Matcher::is_reg2reg_move(MachNode* m) {
 3836   switch (m->rule()) {
 3837     case MoveVec2Leg_rule:
 3838     case MoveLeg2Vec_rule:
 3839     case MoveF2VL_rule:
 3840     case MoveF2LEG_rule:
 3841     case MoveVL2F_rule:
 3842     case MoveLEG2F_rule:
 3843     case MoveD2VL_rule:
 3844     case MoveD2LEG_rule:
 3845     case MoveVL2D_rule:
 3846     case MoveLEG2D_rule:
 3847       return true;
 3848     default:
 3849       return false;
 3850   }
 3851 }
 3852 
 3853 bool Matcher::is_generic_vector(MachOper* opnd) {
 3854   switch (opnd->opcode()) {
 3855     case VEC:
 3856     case LEGVEC:
 3857       return true;
 3858     default:
 3859       return false;
 3860   }
 3861 }
 3862 
 3863 //------------------------------------------------------------------------
 3864 
 3865 const RegMask* Matcher::predicate_reg_mask(void) {
 3866   return &_VECTMASK_REG_mask;
 3867 }
 3868 
 3869 // Max vector size in bytes. 0 if not supported.
 3870 int Matcher::vector_width_in_bytes(BasicType bt) {
 3871   assert(is_java_primitive(bt), "only primitive type vectors");
 3872   // SSE2 supports 128bit vectors for all types.
 3873   // AVX2 supports 256bit vectors for all types.
 3874   // AVX2/EVEX supports 512bit vectors for all types.
 3875   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3876   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3877   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3878     size = (UseAVX > 2) ? 64 : 32;
 3879   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3880     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3881   // Use flag to limit vector size.
 3882   size = MIN2(size,(int)MaxVectorSize);
 3883   // Minimum 2 values in vector (or 4 for bytes).
 3884   switch (bt) {
 3885   case T_DOUBLE:
 3886   case T_LONG:
 3887     if (size < 16) return 0;
 3888     break;
 3889   case T_FLOAT:
 3890   case T_INT:
 3891     if (size < 8) return 0;
 3892     break;
 3893   case T_BOOLEAN:
 3894     if (size < 4) return 0;
 3895     break;
 3896   case T_CHAR:
 3897     if (size < 4) return 0;
 3898     break;
 3899   case T_BYTE:
 3900     if (size < 4) return 0;
 3901     break;
 3902   case T_SHORT:
 3903     if (size < 4) return 0;
 3904     break;
 3905   default:
 3906     ShouldNotReachHere();
 3907   }
 3908   return size;
 3909 }
 3910 
 3911 // Limits on vector size (number of elements) loaded into vector.
 3912 int Matcher::max_vector_size(const BasicType bt) {
 3913   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3914 }
 3915 int Matcher::min_vector_size(const BasicType bt) {
 3916   int max_size = max_vector_size(bt);
 3917   // Min size which can be loaded into vector is 4 bytes.
 3918   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3919   // Support for calling svml double64 vectors
 3920   if (bt == T_DOUBLE) {
 3921     size = 1;
 3922   }
 3923   return MIN2(size,max_size);
 3924 }
 3925 
 3926 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3927   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3928   // by default on Cascade Lake
 3929   if (VM_Version::is_default_intel_cascade_lake()) {
 3930     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3931   }
 3932   return Matcher::max_vector_size(bt);
 3933 }
 3934 
 3935 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3936   return -1;
 3937 }
 3938 
 3939 // Vector ideal reg corresponding to specified size in bytes
 3940 uint Matcher::vector_ideal_reg(int size) {
 3941   assert(MaxVectorSize >= size, "");
 3942   switch(size) {
 3943     case  4: return Op_VecS;
 3944     case  8: return Op_VecD;
 3945     case 16: return Op_VecX;
 3946     case 32: return Op_VecY;
 3947     case 64: return Op_VecZ;
 3948   }
 3949   ShouldNotReachHere();
 3950   return 0;
 3951 }
 3952 
 3953 // Check for shift by small constant as well
 3954 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3955   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3956       shift->in(2)->get_int() <= 3 &&
 3957       // Are there other uses besides address expressions?
 3958       !matcher->is_visited(shift)) {
 3959     address_visited.set(shift->_idx); // Flag as address_visited
 3960     mstack.push(shift->in(2), Matcher::Visit);
 3961     Node *conv = shift->in(1);
 3962     // Allow Matcher to match the rule which bypass
 3963     // ConvI2L operation for an array index on LP64
 3964     // if the index value is positive.
 3965     if (conv->Opcode() == Op_ConvI2L &&
 3966         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3967         // Are there other uses besides address expressions?
 3968         !matcher->is_visited(conv)) {
 3969       address_visited.set(conv->_idx); // Flag as address_visited
 3970       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3971     } else {
 3972       mstack.push(conv, Matcher::Pre_Visit);
 3973     }
 3974     return true;
 3975   }
 3976   return false;
 3977 }
 3978 
 3979 // This function identifies sub-graphs in which a 'load' node is
 3980 // input to two different nodes, and such that it can be matched
 3981 // with BMI instructions like blsi, blsr, etc.
 3982 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3983 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3984 // refers to the same node.
 3985 //
 3986 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3987 // This is a temporary solution until we make DAGs expressible in ADL.
 3988 template<typename ConType>
 3989 class FusedPatternMatcher {
 3990   Node* _op1_node;
 3991   Node* _mop_node;
 3992   int _con_op;
 3993 
 3994   static int match_next(Node* n, int next_op, int next_op_idx) {
 3995     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3996       return -1;
 3997     }
 3998 
 3999     if (next_op_idx == -1) { // n is commutative, try rotations
 4000       if (n->in(1)->Opcode() == next_op) {
 4001         return 1;
 4002       } else if (n->in(2)->Opcode() == next_op) {
 4003         return 2;
 4004       }
 4005     } else {
 4006       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 4007       if (n->in(next_op_idx)->Opcode() == next_op) {
 4008         return next_op_idx;
 4009       }
 4010     }
 4011     return -1;
 4012   }
 4013 
 4014  public:
 4015   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 4016     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 4017 
 4018   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 4019              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4020              typename ConType::NativeType con_value) {
 4021     if (_op1_node->Opcode() != op1) {
 4022       return false;
 4023     }
 4024     if (_mop_node->outcnt() > 2) {
 4025       return false;
 4026     }
 4027     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4028     if (op1_op2_idx == -1) {
 4029       return false;
 4030     }
 4031     // Memory operation must be the other edge
 4032     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4033 
 4034     // Check that the mop node is really what we want
 4035     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4036       Node* op2_node = _op1_node->in(op1_op2_idx);
 4037       if (op2_node->outcnt() > 1) {
 4038         return false;
 4039       }
 4040       assert(op2_node->Opcode() == op2, "Should be");
 4041       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4042       if (op2_con_idx == -1) {
 4043         return false;
 4044       }
 4045       // Memory operation must be the other edge
 4046       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4047       // Check that the memory operation is the same node
 4048       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4049         // Now check the constant
 4050         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4051         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4052           return true;
 4053         }
 4054       }
 4055     }
 4056     return false;
 4057   }
 4058 };
 4059 
 4060 static bool is_bmi_pattern(Node* n, Node* m) {
 4061   assert(VM_Version::supports_bmi1() && VM_Version::supports_avx(), "sanity");
 4062   if (n != nullptr && m != nullptr) {
 4063     if (m->Opcode() == Op_LoadI) {
 4064       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4065       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4066              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4067              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4068     } else if (m->Opcode() == Op_LoadL) {
 4069       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4070       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4071              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4072              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4073     }
 4074   }
 4075   return false;
 4076 }
 4077 
 4078 // Should the matcher clone input 'm' of node 'n'?
 4079 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4080   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4081   if (VM_Version::supports_bmi1() && VM_Version::supports_avx() && is_bmi_pattern(n, m)) {
 4082     mstack.push(m, Visit);
 4083     return true;
 4084   }
 4085   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4086     mstack.push(m, Visit);           // m = ShiftCntV
 4087     return true;
 4088   }
 4089   if (is_encode_and_store_pattern(n, m)) {
 4090     mstack.push(m, Visit);
 4091     return true;
 4092   }
 4093   return false;
 4094 }
 4095 
 4096 // Should the Matcher clone shifts on addressing modes, expecting them
 4097 // to be subsumed into complex addressing expressions or compute them
 4098 // into registers?
 4099 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4100   Node *off = m->in(AddPNode::Offset);
 4101   if (off->is_Con()) {
 4102     address_visited.test_set(m->_idx); // Flag as address_visited
 4103     Node *adr = m->in(AddPNode::Address);
 4104 
 4105     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4106     // AtomicAdd is not an addressing expression.
 4107     // Cheap to find it by looking for screwy base.
 4108     if (adr->is_AddP() &&
 4109         !adr->in(AddPNode::Base)->is_top() &&
 4110         !adr->in(AddPNode::Offset)->is_Con() &&
 4111         off->get_long() == (int) (off->get_long()) && // immL32
 4112         // Are there other uses besides address expressions?
 4113         !is_visited(adr)) {
 4114       address_visited.set(adr->_idx); // Flag as address_visited
 4115       Node *shift = adr->in(AddPNode::Offset);
 4116       if (!clone_shift(shift, this, mstack, address_visited)) {
 4117         mstack.push(shift, Pre_Visit);
 4118       }
 4119       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4120       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4121     } else {
 4122       mstack.push(adr, Pre_Visit);
 4123     }
 4124 
 4125     // Clone X+offset as it also folds into most addressing expressions
 4126     mstack.push(off, Visit);
 4127     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4128     return true;
 4129   } else if (clone_shift(off, this, mstack, address_visited)) {
 4130     address_visited.test_set(m->_idx); // Flag as address_visited
 4131     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4132     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4133     return true;
 4134   }
 4135   return false;
 4136 }
 4137 
 4138 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4139   switch (bt) {
 4140     case BoolTest::eq:
 4141       return Assembler::eq;
 4142     case BoolTest::ne:
 4143       return Assembler::neq;
 4144     case BoolTest::le:
 4145     case BoolTest::ule:
 4146       return Assembler::le;
 4147     case BoolTest::ge:
 4148     case BoolTest::uge:
 4149       return Assembler::nlt;
 4150     case BoolTest::lt:
 4151     case BoolTest::ult:
 4152       return Assembler::lt;
 4153     case BoolTest::gt:
 4154     case BoolTest::ugt:
 4155       return Assembler::nle;
 4156     default : ShouldNotReachHere(); return Assembler::_false;
 4157   }
 4158 }
 4159 
 4160 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4161   switch (bt) {
 4162   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4163   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4164   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4165   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4166   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4167   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4168   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4169   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4170   }
 4171 }
 4172 
 4173 // Helper methods for MachSpillCopyNode::implementation().
 4174 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4175                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4176   assert(ireg == Op_VecS || // 32bit vector
 4177          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4178           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4179          "no non-adjacent vector moves" );
 4180   if (masm) {
 4181     switch (ireg) {
 4182     case Op_VecS: // copy whole register
 4183     case Op_VecD:
 4184     case Op_VecX:
 4185       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4186         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4187       } else {
 4188         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4189      }
 4190       break;
 4191     case Op_VecY:
 4192       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4193         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4194       } else {
 4195         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4196      }
 4197       break;
 4198     case Op_VecZ:
 4199       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4200       break;
 4201     default:
 4202       ShouldNotReachHere();
 4203     }
 4204 #ifndef PRODUCT
 4205   } else {
 4206     switch (ireg) {
 4207     case Op_VecS:
 4208     case Op_VecD:
 4209     case Op_VecX:
 4210       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4211       break;
 4212     case Op_VecY:
 4213     case Op_VecZ:
 4214       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4215       break;
 4216     default:
 4217       ShouldNotReachHere();
 4218     }
 4219 #endif
 4220   }
 4221 }
 4222 
 4223 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4224                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4225   if (masm) {
 4226     if (is_load) {
 4227       switch (ireg) {
 4228       case Op_VecS:
 4229         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4230         break;
 4231       case Op_VecD:
 4232         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4233         break;
 4234       case Op_VecX:
 4235         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4236           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4237         } else {
 4238           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4239           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4240         }
 4241         break;
 4242       case Op_VecY:
 4243         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4244           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4245         } else {
 4246           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4247           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4248         }
 4249         break;
 4250       case Op_VecZ:
 4251         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4252         break;
 4253       default:
 4254         ShouldNotReachHere();
 4255       }
 4256     } else { // store
 4257       switch (ireg) {
 4258       case Op_VecS:
 4259         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4260         break;
 4261       case Op_VecD:
 4262         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4263         break;
 4264       case Op_VecX:
 4265         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4266           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4267         }
 4268         else {
 4269           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4270         }
 4271         break;
 4272       case Op_VecY:
 4273         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4274           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4275         }
 4276         else {
 4277           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4278         }
 4279         break;
 4280       case Op_VecZ:
 4281         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4282         break;
 4283       default:
 4284         ShouldNotReachHere();
 4285       }
 4286     }
 4287 #ifndef PRODUCT
 4288   } else {
 4289     if (is_load) {
 4290       switch (ireg) {
 4291       case Op_VecS:
 4292         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4293         break;
 4294       case Op_VecD:
 4295         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4296         break;
 4297        case Op_VecX:
 4298         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4299         break;
 4300       case Op_VecY:
 4301       case Op_VecZ:
 4302         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4303         break;
 4304       default:
 4305         ShouldNotReachHere();
 4306       }
 4307     } else { // store
 4308       switch (ireg) {
 4309       case Op_VecS:
 4310         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4311         break;
 4312       case Op_VecD:
 4313         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4314         break;
 4315        case Op_VecX:
 4316         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4317         break;
 4318       case Op_VecY:
 4319       case Op_VecZ:
 4320         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4321         break;
 4322       default:
 4323         ShouldNotReachHere();
 4324       }
 4325     }
 4326 #endif
 4327   }
 4328 }
 4329 
 4330 template <class T>
 4331 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4332   int size = type2aelembytes(bt) * len;
 4333   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4334   for (int i = 0; i < len; i++) {
 4335     int offset = i * type2aelembytes(bt);
 4336     switch (bt) {
 4337       case T_BYTE: val->at(i) = con; break;
 4338       case T_SHORT: {
 4339         jshort c = con;
 4340         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4341         break;
 4342       }
 4343       case T_INT: {
 4344         jint c = con;
 4345         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4346         break;
 4347       }
 4348       case T_LONG: {
 4349         jlong c = con;
 4350         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4351         break;
 4352       }
 4353       case T_FLOAT: {
 4354         jfloat c = con;
 4355         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4356         break;
 4357       }
 4358       case T_DOUBLE: {
 4359         jdouble c = con;
 4360         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4361         break;
 4362       }
 4363       default: assert(false, "%s", type2name(bt));
 4364     }
 4365   }
 4366   return val;
 4367 }
 4368 
 4369 static inline jlong high_bit_set(BasicType bt) {
 4370   switch (bt) {
 4371     case T_BYTE:  return 0x8080808080808080;
 4372     case T_SHORT: return 0x8000800080008000;
 4373     case T_INT:   return 0x8000000080000000;
 4374     case T_LONG:  return 0x8000000000000000;
 4375     default:
 4376       ShouldNotReachHere();
 4377       return 0;
 4378   }
 4379 }
 4380 
 4381 #ifndef PRODUCT
 4382   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4383     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4384   }
 4385 #endif
 4386 
 4387   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4388     __ nop(_count);
 4389   }
 4390 
 4391   uint MachNopNode::size(PhaseRegAlloc*) const {
 4392     return _count;
 4393   }
 4394 
 4395 #ifndef PRODUCT
 4396   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4397     st->print("# breakpoint");
 4398   }
 4399 #endif
 4400 
 4401   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4402     __ int3();
 4403   }
 4404 
 4405   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4406     return MachNode::size(ra_);
 4407   }
 4408 
 4409 %}
 4410 
 4411 //----------ENCODING BLOCK-----------------------------------------------------
 4412 // This block specifies the encoding classes used by the compiler to
 4413 // output byte streams.  Encoding classes are parameterized macros
 4414 // used by Machine Instruction Nodes in order to generate the bit
 4415 // encoding of the instruction.  Operands specify their base encoding
 4416 // interface with the interface keyword.  There are currently
 4417 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4418 // COND_INTER.  REG_INTER causes an operand to generate a function
 4419 // which returns its register number when queried.  CONST_INTER causes
 4420 // an operand to generate a function which returns the value of the
 4421 // constant when queried.  MEMORY_INTER causes an operand to generate
 4422 // four functions which return the Base Register, the Index Register,
 4423 // the Scale Value, and the Offset Value of the operand when queried.
 4424 // COND_INTER causes an operand to generate six functions which return
 4425 // the encoding code (ie - encoding bits for the instruction)
 4426 // associated with each basic boolean condition for a conditional
 4427 // instruction.
 4428 //
 4429 // Instructions specify two basic values for encoding.  Again, a
 4430 // function is available to check if the constant displacement is an
 4431 // oop. They use the ins_encode keyword to specify their encoding
 4432 // classes (which must be a sequence of enc_class names, and their
 4433 // parameters, specified in the encoding block), and they use the
 4434 // opcode keyword to specify, in order, their primary, secondary, and
 4435 // tertiary opcode.  Only the opcode sections which a particular
 4436 // instruction needs for encoding need to be specified.
 4437 encode %{
 4438   enc_class cdql_enc(no_rax_rdx_RegI div)
 4439   %{
 4440     // Full implementation of Java idiv and irem; checks for
 4441     // special case as described in JVM spec., p.243 & p.271.
 4442     //
 4443     //         normal case                           special case
 4444     //
 4445     // input : rax: dividend                         min_int
 4446     //         reg: divisor                          -1
 4447     //
 4448     // output: rax: quotient  (= rax idiv reg)       min_int
 4449     //         rdx: remainder (= rax irem reg)       0
 4450     //
 4451     //  Code sequnce:
 4452     //
 4453     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4454     //    5:   75 07/08                jne    e <normal>
 4455     //    7:   33 d2                   xor    %edx,%edx
 4456     //  [div >= 8 -> offset + 1]
 4457     //  [REX_B]
 4458     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4459     //    c:   74 03/04                je     11 <done>
 4460     // 000000000000000e <normal>:
 4461     //    e:   99                      cltd
 4462     //  [div >= 8 -> offset + 1]
 4463     //  [REX_B]
 4464     //    f:   f7 f9                   idiv   $div
 4465     // 0000000000000011 <done>:
 4466     Label normal;
 4467     Label done;
 4468 
 4469     // cmp    $0x80000000,%eax
 4470     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4471 
 4472     // jne    e <normal>
 4473     __ jccb(Assembler::notEqual, normal);
 4474 
 4475     // xor    %edx,%edx
 4476     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4477 
 4478     // cmp    $0xffffffffffffffff,%ecx
 4479     __ cmpl($div$$Register, -1);
 4480 
 4481     // je     11 <done>
 4482     __ jccb(Assembler::equal, done);
 4483 
 4484     // <normal>
 4485     // cltd
 4486     __ bind(normal);
 4487     __ cdql();
 4488 
 4489     // idivl
 4490     // <done>
 4491     __ idivl($div$$Register);
 4492     __ bind(done);
 4493   %}
 4494 
 4495   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4496   %{
 4497     // Full implementation of Java ldiv and lrem; checks for
 4498     // special case as described in JVM spec., p.243 & p.271.
 4499     //
 4500     //         normal case                           special case
 4501     //
 4502     // input : rax: dividend                         min_long
 4503     //         reg: divisor                          -1
 4504     //
 4505     // output: rax: quotient  (= rax idiv reg)       min_long
 4506     //         rdx: remainder (= rax irem reg)       0
 4507     //
 4508     //  Code sequnce:
 4509     //
 4510     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4511     //    7:   00 00 80
 4512     //    a:   48 39 d0                cmp    %rdx,%rax
 4513     //    d:   75 08                   jne    17 <normal>
 4514     //    f:   33 d2                   xor    %edx,%edx
 4515     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4516     //   15:   74 05                   je     1c <done>
 4517     // 0000000000000017 <normal>:
 4518     //   17:   48 99                   cqto
 4519     //   19:   48 f7 f9                idiv   $div
 4520     // 000000000000001c <done>:
 4521     Label normal;
 4522     Label done;
 4523 
 4524     // mov    $0x8000000000000000,%rdx
 4525     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4526 
 4527     // cmp    %rdx,%rax
 4528     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4529 
 4530     // jne    17 <normal>
 4531     __ jccb(Assembler::notEqual, normal);
 4532 
 4533     // xor    %edx,%edx
 4534     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4535 
 4536     // cmp    $0xffffffffffffffff,$div
 4537     __ cmpq($div$$Register, -1);
 4538 
 4539     // je     1e <done>
 4540     __ jccb(Assembler::equal, done);
 4541 
 4542     // <normal>
 4543     // cqto
 4544     __ bind(normal);
 4545     __ cdqq();
 4546 
 4547     // idivq (note: must be emitted by the user of this rule)
 4548     // <done>
 4549     __ idivq($div$$Register);
 4550     __ bind(done);
 4551   %}
 4552 
 4553   enc_class clear_avx %{
 4554     DEBUG_ONLY(int off0 = __ offset());
 4555     if (generate_vzeroupper(Compile::current())) {
 4556       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4557       // Clear upper bits of YMM registers when current compiled code uses
 4558       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4559       __ vzeroupper();
 4560     }
 4561     DEBUG_ONLY(int off1 = __ offset());
 4562     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4563   %}
 4564 
 4565   enc_class Java_To_Runtime(method meth) %{
 4566     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4567     __ call(r10);
 4568     __ post_call_nop();
 4569   %}
 4570 
 4571   enc_class Java_Static_Call(method meth)
 4572   %{
 4573     // JAVA STATIC CALL
 4574     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4575     // determine who we intended to call.
 4576     if (!_method) {
 4577       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4578     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4579       // The NOP here is purely to ensure that eliding a call to
 4580       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4581       __ nop(5);
 4582       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4583     } else {
 4584       int method_index = resolved_method_index(masm);
 4585       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4586                                                   : static_call_Relocation::spec(method_index);
 4587       address mark = __ pc();
 4588       int call_offset = __ offset();
 4589       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4590       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4591         // Calls of the same statically bound method can share
 4592         // a stub to the interpreter.
 4593         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4594       } else {
 4595         // Emit stubs for static call.
 4596         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4597         __ clear_inst_mark();
 4598         if (stub == nullptr) {
 4599           ciEnv::current()->record_failure("CodeCache is full");
 4600           return;
 4601         }
 4602       }
 4603     }
 4604     __ post_call_nop();
 4605   %}
 4606 
 4607   enc_class Java_Dynamic_Call(method meth) %{
 4608     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4609     __ post_call_nop();
 4610   %}
 4611 
 4612   enc_class call_epilog %{
 4613     if (VerifyStackAtCalls) {
 4614       // Check that stack depth is unchanged: find majik cookie on stack
 4615       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4616       Label L;
 4617       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4618       __ jccb(Assembler::equal, L);
 4619       // Die if stack mismatch
 4620       __ int3();
 4621       __ bind(L);
 4622     }
 4623     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4624       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4625       // Search for the corresponding projection, get the register and emit code that initializes it.
 4626       uint con = (tf()->range_cc()->cnt() - 1);
 4627       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4628         ProjNode* proj = fast_out(i)->as_Proj();
 4629         if (proj->_con == con) {
 4630           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4631           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4632           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4633           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4634           __ testq(rax, rax);
 4635           __ setb(Assembler::notZero, toReg);
 4636           __ movzbl(toReg, toReg);
 4637           if (reg->is_stack()) {
 4638             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4639             __ movq(Address(rsp, st_off), toReg);
 4640           }
 4641           break;
 4642         }
 4643       }
 4644       if (return_value_is_used()) {
 4645         // An inline type is returned as fields in multiple registers.
 4646         // Rax either contains an oop if the inline type is buffered or a pointer
 4647         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4648         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4649         // rax &= (rax & 1) - 1
 4650         __ movptr(rscratch1, rax);
 4651         __ andptr(rscratch1, 0x1);
 4652         __ subptr(rscratch1, 0x1);
 4653         __ andptr(rax, rscratch1);
 4654       }
 4655     }
 4656   %}
 4657 
 4658 %}
 4659 
 4660 //----------FRAME--------------------------------------------------------------
 4661 // Definition of frame structure and management information.
 4662 //
 4663 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4664 //                             |   (to get allocators register number
 4665 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4666 //  r   CALLER     |        |
 4667 //  o     |        +--------+      pad to even-align allocators stack-slot
 4668 //  w     V        |  pad0  |        numbers; owned by CALLER
 4669 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4670 //  h     ^        |   in   |  5
 4671 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4672 //  |     |        |        |  3
 4673 //  |     |        +--------+
 4674 //  V     |        | old out|      Empty on Intel, window on Sparc
 4675 //        |    old |preserve|      Must be even aligned.
 4676 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4677 //        |        |   in   |  3   area for Intel ret address
 4678 //     Owned by    |preserve|      Empty on Sparc.
 4679 //       SELF      +--------+
 4680 //        |        |  pad2  |  2   pad to align old SP
 4681 //        |        +--------+  1
 4682 //        |        | locks  |  0
 4683 //        |        +--------+----> OptoReg::stack0(), even aligned
 4684 //        |        |  pad1  | 11   pad to align new SP
 4685 //        |        +--------+
 4686 //        |        |        | 10
 4687 //        |        | spills |  9   spills
 4688 //        V        |        |  8   (pad0 slot for callee)
 4689 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4690 //        ^        |  out   |  7
 4691 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4692 //     Owned by    +--------+
 4693 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4694 //        |    new |preserve|      Must be even-aligned.
 4695 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4696 //        |        |        |
 4697 //
 4698 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4699 //         known from SELF's arguments and the Java calling convention.
 4700 //         Region 6-7 is determined per call site.
 4701 // Note 2: If the calling convention leaves holes in the incoming argument
 4702 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4703 //         are owned by the CALLEE.  Holes should not be necessary in the
 4704 //         incoming area, as the Java calling convention is completely under
 4705 //         the control of the AD file.  Doubles can be sorted and packed to
 4706 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4707 //         varargs C calling conventions.
 4708 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4709 //         even aligned with pad0 as needed.
 4710 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4711 //         region 6-11 is even aligned; it may be padded out more so that
 4712 //         the region from SP to FP meets the minimum stack alignment.
 4713 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4714 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4715 //         SP meets the minimum alignment.
 4716 
 4717 frame
 4718 %{
 4719   // These three registers define part of the calling convention
 4720   // between compiled code and the interpreter.
 4721   inline_cache_reg(RAX);                // Inline Cache Register
 4722 
 4723   // Optional: name the operand used by cisc-spilling to access
 4724   // [stack_pointer + offset]
 4725   cisc_spilling_operand_name(indOffset32);
 4726 
 4727   // Number of stack slots consumed by locking an object
 4728   sync_stack_slots(2);
 4729 
 4730   // Compiled code's Frame Pointer
 4731   frame_pointer(RSP);
 4732 
 4733   // Stack alignment requirement
 4734   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4735 
 4736   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4737   // for calls to C.  Supports the var-args backing area for register parms.
 4738   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4739 
 4740   // The after-PROLOG location of the return address.  Location of
 4741   // return address specifies a type (REG or STACK) and a number
 4742   // representing the register number (i.e. - use a register name) or
 4743   // stack slot.
 4744   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4745   // Otherwise, it is above the locks and verification slot and alignment word
 4746   return_addr(STACK - 2 +
 4747               align_up((Compile::current()->in_preserve_stack_slots() +
 4748                         Compile::current()->fixed_slots()),
 4749                        stack_alignment_in_slots()));
 4750 
 4751   // Location of compiled Java return values.  Same as C for now.
 4752   return_value
 4753   %{
 4754     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4755            "only return normal values");
 4756 
 4757     static const int lo[Op_RegL + 1] = {
 4758       0,
 4759       0,
 4760       RAX_num,  // Op_RegN
 4761       RAX_num,  // Op_RegI
 4762       RAX_num,  // Op_RegP
 4763       XMM0_num, // Op_RegF
 4764       XMM0_num, // Op_RegD
 4765       RAX_num   // Op_RegL
 4766     };
 4767     static const int hi[Op_RegL + 1] = {
 4768       0,
 4769       0,
 4770       OptoReg::Bad, // Op_RegN
 4771       OptoReg::Bad, // Op_RegI
 4772       RAX_H_num,    // Op_RegP
 4773       OptoReg::Bad, // Op_RegF
 4774       XMM0b_num,    // Op_RegD
 4775       RAX_H_num     // Op_RegL
 4776     };
 4777     // Excluded flags and vector registers.
 4778     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4779     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4780   %}
 4781 %}
 4782 
 4783 //----------ATTRIBUTES---------------------------------------------------------
 4784 //----------Operand Attributes-------------------------------------------------
 4785 op_attrib op_cost(0);        // Required cost attribute
 4786 
 4787 //----------Instruction Attributes---------------------------------------------
 4788 ins_attrib ins_cost(100);       // Required cost attribute
 4789 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4790 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4791                                 // a non-matching short branch variant
 4792                                 // of some long branch?
 4793 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4794                                 // be a power of 2) specifies the
 4795                                 // alignment that some part of the
 4796                                 // instruction (not necessarily the
 4797                                 // start) requires.  If > 1, a
 4798                                 // compute_padding() function must be
 4799                                 // provided for the instruction
 4800 
 4801 // Whether this node is expanded during code emission into a sequence of
 4802 // instructions and the first instruction can perform an implicit null check.
 4803 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4804 
 4805 //----------OPERANDS-----------------------------------------------------------
 4806 // Operand definitions must precede instruction definitions for correct parsing
 4807 // in the ADLC because operands constitute user defined types which are used in
 4808 // instruction definitions.
 4809 
 4810 //----------Simple Operands----------------------------------------------------
 4811 // Immediate Operands
 4812 // Integer Immediate
 4813 operand immI()
 4814 %{
 4815   match(ConI);
 4816 
 4817   op_cost(10);
 4818   format %{ %}
 4819   interface(CONST_INTER);
 4820 %}
 4821 
 4822 // Constant for test vs zero
 4823 operand immI_0()
 4824 %{
 4825   predicate(n->get_int() == 0);
 4826   match(ConI);
 4827 
 4828   op_cost(0);
 4829   format %{ %}
 4830   interface(CONST_INTER);
 4831 %}
 4832 
 4833 // Constant for increment
 4834 operand immI_1()
 4835 %{
 4836   predicate(n->get_int() == 1);
 4837   match(ConI);
 4838 
 4839   op_cost(0);
 4840   format %{ %}
 4841   interface(CONST_INTER);
 4842 %}
 4843 
 4844 // Constant for decrement
 4845 operand immI_M1()
 4846 %{
 4847   predicate(n->get_int() == -1);
 4848   match(ConI);
 4849 
 4850   op_cost(0);
 4851   format %{ %}
 4852   interface(CONST_INTER);
 4853 %}
 4854 
 4855 operand immI_2()
 4856 %{
 4857   predicate(n->get_int() == 2);
 4858   match(ConI);
 4859 
 4860   op_cost(0);
 4861   format %{ %}
 4862   interface(CONST_INTER);
 4863 %}
 4864 
 4865 operand immI_4()
 4866 %{
 4867   predicate(n->get_int() == 4);
 4868   match(ConI);
 4869 
 4870   op_cost(0);
 4871   format %{ %}
 4872   interface(CONST_INTER);
 4873 %}
 4874 
 4875 operand immI_8()
 4876 %{
 4877   predicate(n->get_int() == 8);
 4878   match(ConI);
 4879 
 4880   op_cost(0);
 4881   format %{ %}
 4882   interface(CONST_INTER);
 4883 %}
 4884 
 4885 // Valid scale values for addressing modes
 4886 operand immI2()
 4887 %{
 4888   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4889   match(ConI);
 4890 
 4891   format %{ %}
 4892   interface(CONST_INTER);
 4893 %}
 4894 
 4895 operand immU7()
 4896 %{
 4897   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4898   match(ConI);
 4899 
 4900   op_cost(5);
 4901   format %{ %}
 4902   interface(CONST_INTER);
 4903 %}
 4904 
 4905 operand immI8()
 4906 %{
 4907   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4908   match(ConI);
 4909 
 4910   op_cost(5);
 4911   format %{ %}
 4912   interface(CONST_INTER);
 4913 %}
 4914 
 4915 operand immU8()
 4916 %{
 4917   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4918   match(ConI);
 4919 
 4920   op_cost(5);
 4921   format %{ %}
 4922   interface(CONST_INTER);
 4923 %}
 4924 
 4925 operand immI16()
 4926 %{
 4927   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4928   match(ConI);
 4929 
 4930   op_cost(10);
 4931   format %{ %}
 4932   interface(CONST_INTER);
 4933 %}
 4934 
 4935 // Int Immediate non-negative
 4936 operand immU31()
 4937 %{
 4938   predicate(n->get_int() >= 0);
 4939   match(ConI);
 4940 
 4941   op_cost(0);
 4942   format %{ %}
 4943   interface(CONST_INTER);
 4944 %}
 4945 
 4946 // Pointer Immediate
 4947 operand immP()
 4948 %{
 4949   match(ConP);
 4950 
 4951   op_cost(10);
 4952   format %{ %}
 4953   interface(CONST_INTER);
 4954 %}
 4955 
 4956 // Null Pointer Immediate
 4957 operand immP0()
 4958 %{
 4959   predicate(n->get_ptr() == 0);
 4960   match(ConP);
 4961 
 4962   op_cost(5);
 4963   format %{ %}
 4964   interface(CONST_INTER);
 4965 %}
 4966 
 4967 // Pointer Immediate
 4968 operand immN() %{
 4969   match(ConN);
 4970 
 4971   op_cost(10);
 4972   format %{ %}
 4973   interface(CONST_INTER);
 4974 %}
 4975 
 4976 operand immNKlass() %{
 4977   match(ConNKlass);
 4978 
 4979   op_cost(10);
 4980   format %{ %}
 4981   interface(CONST_INTER);
 4982 %}
 4983 
 4984 // Null Pointer Immediate
 4985 operand immN0() %{
 4986   predicate(n->get_narrowcon() == 0);
 4987   match(ConN);
 4988 
 4989   op_cost(5);
 4990   format %{ %}
 4991   interface(CONST_INTER);
 4992 %}
 4993 
 4994 operand immP31()
 4995 %{
 4996   predicate(n->as_Type()->type()->is_ptr()->reloc() == relocInfo::none
 4997             && (n->get_ptr() >> 31) == 0);
 4998   match(ConP);
 4999 
 5000   op_cost(5);
 5001   format %{ %}
 5002   interface(CONST_INTER);
 5003 %}
 5004 
 5005 
 5006 // Long Immediate
 5007 operand immL()
 5008 %{
 5009   match(ConL);
 5010 
 5011   op_cost(20);
 5012   format %{ %}
 5013   interface(CONST_INTER);
 5014 %}
 5015 
 5016 // Long Immediate 8-bit
 5017 operand immL8()
 5018 %{
 5019   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 5020   match(ConL);
 5021 
 5022   op_cost(5);
 5023   format %{ %}
 5024   interface(CONST_INTER);
 5025 %}
 5026 
 5027 // Long Immediate 32-bit unsigned
 5028 operand immUL32()
 5029 %{
 5030   predicate(n->get_long() == (unsigned int) (n->get_long()));
 5031   match(ConL);
 5032 
 5033   op_cost(10);
 5034   format %{ %}
 5035   interface(CONST_INTER);
 5036 %}
 5037 
 5038 // Long Immediate 32-bit signed
 5039 operand immL32()
 5040 %{
 5041   predicate(n->get_long() == (int) (n->get_long()));
 5042   match(ConL);
 5043 
 5044   op_cost(15);
 5045   format %{ %}
 5046   interface(CONST_INTER);
 5047 %}
 5048 
 5049 operand immL_Pow2()
 5050 %{
 5051   predicate(is_power_of_2((julong)n->get_long()));
 5052   match(ConL);
 5053 
 5054   op_cost(15);
 5055   format %{ %}
 5056   interface(CONST_INTER);
 5057 %}
 5058 
 5059 operand immL_NotPow2()
 5060 %{
 5061   predicate(is_power_of_2((julong)~n->get_long()));
 5062   match(ConL);
 5063 
 5064   op_cost(15);
 5065   format %{ %}
 5066   interface(CONST_INTER);
 5067 %}
 5068 
 5069 // Long Immediate zero
 5070 operand immL0()
 5071 %{
 5072   predicate(n->get_long() == 0L);
 5073   match(ConL);
 5074 
 5075   op_cost(10);
 5076   format %{ %}
 5077   interface(CONST_INTER);
 5078 %}
 5079 
 5080 // Constant for increment
 5081 operand immL1()
 5082 %{
 5083   predicate(n->get_long() == 1);
 5084   match(ConL);
 5085 
 5086   format %{ %}
 5087   interface(CONST_INTER);
 5088 %}
 5089 
 5090 // Constant for decrement
 5091 operand immL_M1()
 5092 %{
 5093   predicate(n->get_long() == -1);
 5094   match(ConL);
 5095 
 5096   format %{ %}
 5097   interface(CONST_INTER);
 5098 %}
 5099 
 5100 // Long Immediate: low 32-bit mask
 5101 operand immL_32bits()
 5102 %{
 5103   predicate(n->get_long() == 0xFFFFFFFFL);
 5104   match(ConL);
 5105   op_cost(20);
 5106 
 5107   format %{ %}
 5108   interface(CONST_INTER);
 5109 %}
 5110 
 5111 // Int Immediate: 2^n-1, positive
 5112 operand immI_Pow2M1()
 5113 %{
 5114   predicate((n->get_int() > 0)
 5115             && is_power_of_2((juint)n->get_int() + 1));
 5116   match(ConI);
 5117 
 5118   op_cost(20);
 5119   format %{ %}
 5120   interface(CONST_INTER);
 5121 %}
 5122 
 5123 // Float Immediate zero
 5124 operand immF0()
 5125 %{
 5126   predicate(jint_cast(n->getf()) == 0);
 5127   match(ConF);
 5128 
 5129   op_cost(5);
 5130   format %{ %}
 5131   interface(CONST_INTER);
 5132 %}
 5133 
 5134 // Float Immediate
 5135 operand immF()
 5136 %{
 5137   match(ConF);
 5138 
 5139   op_cost(15);
 5140   format %{ %}
 5141   interface(CONST_INTER);
 5142 %}
 5143 
 5144 // Half Float Immediate
 5145 operand immH()
 5146 %{
 5147   match(ConH);
 5148 
 5149   op_cost(15);
 5150   format %{ %}
 5151   interface(CONST_INTER);
 5152 %}
 5153 
 5154 // Double Immediate zero
 5155 operand immD0()
 5156 %{
 5157   predicate(jlong_cast(n->getd()) == 0);
 5158   match(ConD);
 5159 
 5160   op_cost(5);
 5161   format %{ %}
 5162   interface(CONST_INTER);
 5163 %}
 5164 
 5165 // Double Immediate
 5166 operand immD()
 5167 %{
 5168   match(ConD);
 5169 
 5170   op_cost(15);
 5171   format %{ %}
 5172   interface(CONST_INTER);
 5173 %}
 5174 
 5175 // Immediates for special shifts (sign extend)
 5176 
 5177 // Constants for increment
 5178 operand immI_16()
 5179 %{
 5180   predicate(n->get_int() == 16);
 5181   match(ConI);
 5182 
 5183   format %{ %}
 5184   interface(CONST_INTER);
 5185 %}
 5186 
 5187 operand immI_24()
 5188 %{
 5189   predicate(n->get_int() == 24);
 5190   match(ConI);
 5191 
 5192   format %{ %}
 5193   interface(CONST_INTER);
 5194 %}
 5195 
 5196 // Constant for byte-wide masking
 5197 operand immI_255()
 5198 %{
 5199   predicate(n->get_int() == 255);
 5200   match(ConI);
 5201 
 5202   format %{ %}
 5203   interface(CONST_INTER);
 5204 %}
 5205 
 5206 // Constant for short-wide masking
 5207 operand immI_65535()
 5208 %{
 5209   predicate(n->get_int() == 65535);
 5210   match(ConI);
 5211 
 5212   format %{ %}
 5213   interface(CONST_INTER);
 5214 %}
 5215 
 5216 // Constant for byte-wide masking
 5217 operand immL_255()
 5218 %{
 5219   predicate(n->get_long() == 255);
 5220   match(ConL);
 5221 
 5222   format %{ %}
 5223   interface(CONST_INTER);
 5224 %}
 5225 
 5226 // Constant for short-wide masking
 5227 operand immL_65535()
 5228 %{
 5229   predicate(n->get_long() == 65535);
 5230   match(ConL);
 5231 
 5232   format %{ %}
 5233   interface(CONST_INTER);
 5234 %}
 5235 
 5236 // AOT Runtime Constants Address
 5237 operand immAOTRuntimeConstantsAddress()
 5238 %{
 5239   // Check if the address is in the range of AOT Runtime Constants
 5240   predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
 5241   match(ConP);
 5242 
 5243   op_cost(0);
 5244   format %{ %}
 5245   interface(CONST_INTER);
 5246 %}
 5247 
 5248 operand kReg()
 5249 %{
 5250   constraint(ALLOC_IN_RC(vectmask_reg));
 5251   match(RegVectMask);
 5252   format %{%}
 5253   interface(REG_INTER);
 5254 %}
 5255 
 5256 // Register Operands
 5257 // Integer Register
 5258 operand rRegI()
 5259 %{
 5260   constraint(ALLOC_IN_RC(int_reg));
 5261   match(RegI);
 5262 
 5263   match(rax_RegI);
 5264   match(rbx_RegI);
 5265   match(rcx_RegI);
 5266   match(rdx_RegI);
 5267   match(rdi_RegI);
 5268 
 5269   format %{ %}
 5270   interface(REG_INTER);
 5271 %}
 5272 
 5273 // Special Registers
 5274 operand rax_RegI()
 5275 %{
 5276   constraint(ALLOC_IN_RC(int_rax_reg));
 5277   match(RegI);
 5278   match(rRegI);
 5279 
 5280   format %{ "RAX" %}
 5281   interface(REG_INTER);
 5282 %}
 5283 
 5284 // Special Registers
 5285 operand rbx_RegI()
 5286 %{
 5287   constraint(ALLOC_IN_RC(int_rbx_reg));
 5288   match(RegI);
 5289   match(rRegI);
 5290 
 5291   format %{ "RBX" %}
 5292   interface(REG_INTER);
 5293 %}
 5294 
 5295 operand rcx_RegI()
 5296 %{
 5297   constraint(ALLOC_IN_RC(int_rcx_reg));
 5298   match(RegI);
 5299   match(rRegI);
 5300 
 5301   format %{ "RCX" %}
 5302   interface(REG_INTER);
 5303 %}
 5304 
 5305 operand rdx_RegI()
 5306 %{
 5307   constraint(ALLOC_IN_RC(int_rdx_reg));
 5308   match(RegI);
 5309   match(rRegI);
 5310 
 5311   format %{ "RDX" %}
 5312   interface(REG_INTER);
 5313 %}
 5314 
 5315 operand rdi_RegI()
 5316 %{
 5317   constraint(ALLOC_IN_RC(int_rdi_reg));
 5318   match(RegI);
 5319   match(rRegI);
 5320 
 5321   format %{ "RDI" %}
 5322   interface(REG_INTER);
 5323 %}
 5324 
 5325 operand no_rax_rdx_RegI()
 5326 %{
 5327   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5328   match(RegI);
 5329   match(rbx_RegI);
 5330   match(rcx_RegI);
 5331   match(rdi_RegI);
 5332 
 5333   format %{ %}
 5334   interface(REG_INTER);
 5335 %}
 5336 
 5337 operand no_rbp_r13_RegI()
 5338 %{
 5339   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5340   match(RegI);
 5341   match(rRegI);
 5342   match(rax_RegI);
 5343   match(rbx_RegI);
 5344   match(rcx_RegI);
 5345   match(rdx_RegI);
 5346   match(rdi_RegI);
 5347 
 5348   format %{ %}
 5349   interface(REG_INTER);
 5350 %}
 5351 
 5352 // Pointer Register
 5353 operand any_RegP()
 5354 %{
 5355   constraint(ALLOC_IN_RC(any_reg));
 5356   match(RegP);
 5357   match(rax_RegP);
 5358   match(rbx_RegP);
 5359   match(rdi_RegP);
 5360   match(rsi_RegP);
 5361   match(rbp_RegP);
 5362   match(r15_RegP);
 5363   match(rRegP);
 5364 
 5365   format %{ %}
 5366   interface(REG_INTER);
 5367 %}
 5368 
 5369 operand rRegP()
 5370 %{
 5371   constraint(ALLOC_IN_RC(ptr_reg));
 5372   match(RegP);
 5373   match(rax_RegP);
 5374   match(rbx_RegP);
 5375   match(rdi_RegP);
 5376   match(rsi_RegP);
 5377   match(rbp_RegP);  // See Q&A below about
 5378   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5379 
 5380   format %{ %}
 5381   interface(REG_INTER);
 5382 %}
 5383 
 5384 operand rRegN() %{
 5385   constraint(ALLOC_IN_RC(int_reg));
 5386   match(RegN);
 5387 
 5388   format %{ %}
 5389   interface(REG_INTER);
 5390 %}
 5391 
 5392 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5393 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5394 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5395 // The output of an instruction is controlled by the allocator, which respects
 5396 // register class masks, not match rules.  Unless an instruction mentions
 5397 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5398 // by the allocator as an input.
 5399 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5400 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5401 // result, RBP is not included in the output of the instruction either.
 5402 
 5403 // This operand is not allowed to use RBP even if
 5404 // RBP is not used to hold the frame pointer.
 5405 operand no_rbp_RegP()
 5406 %{
 5407   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5408   match(RegP);
 5409   match(rbx_RegP);
 5410   match(rsi_RegP);
 5411   match(rdi_RegP);
 5412 
 5413   format %{ %}
 5414   interface(REG_INTER);
 5415 %}
 5416 
 5417 // Special Registers
 5418 // Return a pointer value
 5419 operand rax_RegP()
 5420 %{
 5421   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5422   match(RegP);
 5423   match(rRegP);
 5424 
 5425   format %{ %}
 5426   interface(REG_INTER);
 5427 %}
 5428 
 5429 // Special Registers
 5430 // Return a compressed pointer value
 5431 operand rax_RegN()
 5432 %{
 5433   constraint(ALLOC_IN_RC(int_rax_reg));
 5434   match(RegN);
 5435   match(rRegN);
 5436 
 5437   format %{ %}
 5438   interface(REG_INTER);
 5439 %}
 5440 
 5441 // Used in AtomicAdd
 5442 operand rbx_RegP()
 5443 %{
 5444   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5445   match(RegP);
 5446   match(rRegP);
 5447 
 5448   format %{ %}
 5449   interface(REG_INTER);
 5450 %}
 5451 
 5452 operand rsi_RegP()
 5453 %{
 5454   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5455   match(RegP);
 5456   match(rRegP);
 5457 
 5458   format %{ %}
 5459   interface(REG_INTER);
 5460 %}
 5461 
 5462 operand rbp_RegP()
 5463 %{
 5464   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5465   match(RegP);
 5466   match(rRegP);
 5467 
 5468   format %{ %}
 5469   interface(REG_INTER);
 5470 %}
 5471 
 5472 // Used in rep stosq
 5473 operand rdi_RegP()
 5474 %{
 5475   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5476   match(RegP);
 5477   match(rRegP);
 5478 
 5479   format %{ %}
 5480   interface(REG_INTER);
 5481 %}
 5482 
 5483 operand r15_RegP()
 5484 %{
 5485   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5486   match(RegP);
 5487   match(rRegP);
 5488 
 5489   format %{ %}
 5490   interface(REG_INTER);
 5491 %}
 5492 
 5493 operand rRegL()
 5494 %{
 5495   constraint(ALLOC_IN_RC(long_reg));
 5496   match(RegL);
 5497   match(rax_RegL);
 5498   match(rdx_RegL);
 5499 
 5500   format %{ %}
 5501   interface(REG_INTER);
 5502 %}
 5503 
 5504 // Special Registers
 5505 operand no_rax_rdx_RegL()
 5506 %{
 5507   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5508   match(RegL);
 5509   match(rRegL);
 5510 
 5511   format %{ %}
 5512   interface(REG_INTER);
 5513 %}
 5514 
 5515 operand rax_RegL()
 5516 %{
 5517   constraint(ALLOC_IN_RC(long_rax_reg));
 5518   match(RegL);
 5519   match(rRegL);
 5520 
 5521   format %{ "RAX" %}
 5522   interface(REG_INTER);
 5523 %}
 5524 
 5525 operand rcx_RegL()
 5526 %{
 5527   constraint(ALLOC_IN_RC(long_rcx_reg));
 5528   match(RegL);
 5529   match(rRegL);
 5530 
 5531   format %{ %}
 5532   interface(REG_INTER);
 5533 %}
 5534 
 5535 operand rdx_RegL()
 5536 %{
 5537   constraint(ALLOC_IN_RC(long_rdx_reg));
 5538   match(RegL);
 5539   match(rRegL);
 5540 
 5541   format %{ %}
 5542   interface(REG_INTER);
 5543 %}
 5544 
 5545 operand r11_RegL()
 5546 %{
 5547   constraint(ALLOC_IN_RC(long_r11_reg));
 5548   match(RegL);
 5549   match(rRegL);
 5550 
 5551   format %{ %}
 5552   interface(REG_INTER);
 5553 %}
 5554 
 5555 operand no_rbp_r13_RegL()
 5556 %{
 5557   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5558   match(RegL);
 5559   match(rRegL);
 5560   match(rax_RegL);
 5561   match(rcx_RegL);
 5562   match(rdx_RegL);
 5563 
 5564   format %{ %}
 5565   interface(REG_INTER);
 5566 %}
 5567 
 5568 // Flags register, used as output of compare instructions
 5569 operand rFlagsReg()
 5570 %{
 5571   constraint(ALLOC_IN_RC(int_flags));
 5572   match(RegFlags);
 5573 
 5574   format %{ "RFLAGS" %}
 5575   interface(REG_INTER);
 5576 %}
 5577 
 5578 // Flags register, used as output of FLOATING POINT compare instructions
 5579 operand rFlagsRegU()
 5580 %{
 5581   constraint(ALLOC_IN_RC(int_flags));
 5582   match(RegFlags);
 5583 
 5584   format %{ "RFLAGS_U" %}
 5585   interface(REG_INTER);
 5586 %}
 5587 
 5588 operand rFlagsRegUCF() %{
 5589   constraint(ALLOC_IN_RC(int_flags));
 5590   match(RegFlags);
 5591   predicate(!UseAPX || !VM_Version::supports_avx10_2());
 5592 
 5593   format %{ "RFLAGS_U_CF" %}
 5594   interface(REG_INTER);
 5595 %}
 5596 
 5597 operand rFlagsRegUCFE() %{
 5598   constraint(ALLOC_IN_RC(int_flags));
 5599   match(RegFlags);
 5600   predicate(UseAPX && VM_Version::supports_avx10_2());
 5601 
 5602   format %{ "RFLAGS_U_CFE" %}
 5603   interface(REG_INTER);
 5604 %}
 5605 
 5606 // Float register operands
 5607 operand regF() %{
 5608    constraint(ALLOC_IN_RC(float_reg));
 5609    match(RegF);
 5610 
 5611    format %{ %}
 5612    interface(REG_INTER);
 5613 %}
 5614 
 5615 // Float register operands
 5616 operand legRegF() %{
 5617    constraint(ALLOC_IN_RC(float_reg_legacy));
 5618    match(RegF);
 5619 
 5620    format %{ %}
 5621    interface(REG_INTER);
 5622 %}
 5623 
 5624 // Float register operands
 5625 operand vlRegF() %{
 5626    constraint(ALLOC_IN_RC(float_reg_vl));
 5627    match(RegF);
 5628 
 5629    format %{ %}
 5630    interface(REG_INTER);
 5631 %}
 5632 
 5633 // Double register operands
 5634 operand regD() %{
 5635    constraint(ALLOC_IN_RC(double_reg));
 5636    match(RegD);
 5637 
 5638    format %{ %}
 5639    interface(REG_INTER);
 5640 %}
 5641 
 5642 // Double register operands
 5643 operand legRegD() %{
 5644    constraint(ALLOC_IN_RC(double_reg_legacy));
 5645    match(RegD);
 5646 
 5647    format %{ %}
 5648    interface(REG_INTER);
 5649 %}
 5650 
 5651 // Double register operands
 5652 operand vlRegD() %{
 5653    constraint(ALLOC_IN_RC(double_reg_vl));
 5654    match(RegD);
 5655 
 5656    format %{ %}
 5657    interface(REG_INTER);
 5658 %}
 5659 
 5660 //----------Memory Operands----------------------------------------------------
 5661 // Direct Memory Operand
 5662 // operand direct(immP addr)
 5663 // %{
 5664 //   match(addr);
 5665 
 5666 //   format %{ "[$addr]" %}
 5667 //   interface(MEMORY_INTER) %{
 5668 //     base(0xFFFFFFFF);
 5669 //     index(0x4);
 5670 //     scale(0x0);
 5671 //     disp($addr);
 5672 //   %}
 5673 // %}
 5674 
 5675 // Indirect Memory Operand
 5676 operand indirect(any_RegP reg)
 5677 %{
 5678   constraint(ALLOC_IN_RC(ptr_reg));
 5679   match(reg);
 5680 
 5681   format %{ "[$reg]" %}
 5682   interface(MEMORY_INTER) %{
 5683     base($reg);
 5684     index(0x4);
 5685     scale(0x0);
 5686     disp(0x0);
 5687   %}
 5688 %}
 5689 
 5690 // Indirect Memory Plus Short Offset Operand
 5691 operand indOffset8(any_RegP reg, immL8 off)
 5692 %{
 5693   constraint(ALLOC_IN_RC(ptr_reg));
 5694   match(AddP reg off);
 5695 
 5696   format %{ "[$reg + $off (8-bit)]" %}
 5697   interface(MEMORY_INTER) %{
 5698     base($reg);
 5699     index(0x4);
 5700     scale(0x0);
 5701     disp($off);
 5702   %}
 5703 %}
 5704 
 5705 // Indirect Memory Plus Long Offset Operand
 5706 operand indOffset32(any_RegP reg, immL32 off)
 5707 %{
 5708   constraint(ALLOC_IN_RC(ptr_reg));
 5709   match(AddP reg off);
 5710 
 5711   format %{ "[$reg + $off (32-bit)]" %}
 5712   interface(MEMORY_INTER) %{
 5713     base($reg);
 5714     index(0x4);
 5715     scale(0x0);
 5716     disp($off);
 5717   %}
 5718 %}
 5719 
 5720 // Indirect Memory Plus Index Register Plus Offset Operand
 5721 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5722 %{
 5723   constraint(ALLOC_IN_RC(ptr_reg));
 5724   match(AddP (AddP reg lreg) off);
 5725 
 5726   op_cost(10);
 5727   format %{"[$reg + $off + $lreg]" %}
 5728   interface(MEMORY_INTER) %{
 5729     base($reg);
 5730     index($lreg);
 5731     scale(0x0);
 5732     disp($off);
 5733   %}
 5734 %}
 5735 
 5736 // Indirect Memory Plus Index Register Plus Offset Operand
 5737 operand indIndex(any_RegP reg, rRegL lreg)
 5738 %{
 5739   constraint(ALLOC_IN_RC(ptr_reg));
 5740   match(AddP reg lreg);
 5741 
 5742   op_cost(10);
 5743   format %{"[$reg + $lreg]" %}
 5744   interface(MEMORY_INTER) %{
 5745     base($reg);
 5746     index($lreg);
 5747     scale(0x0);
 5748     disp(0x0);
 5749   %}
 5750 %}
 5751 
 5752 // Indirect Memory Times Scale Plus Index Register
 5753 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5754 %{
 5755   constraint(ALLOC_IN_RC(ptr_reg));
 5756   match(AddP reg (LShiftL lreg scale));
 5757 
 5758   op_cost(10);
 5759   format %{"[$reg + $lreg << $scale]" %}
 5760   interface(MEMORY_INTER) %{
 5761     base($reg);
 5762     index($lreg);
 5763     scale($scale);
 5764     disp(0x0);
 5765   %}
 5766 %}
 5767 
 5768 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5769 %{
 5770   constraint(ALLOC_IN_RC(ptr_reg));
 5771   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5772   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5773 
 5774   op_cost(10);
 5775   format %{"[$reg + pos $idx << $scale]" %}
 5776   interface(MEMORY_INTER) %{
 5777     base($reg);
 5778     index($idx);
 5779     scale($scale);
 5780     disp(0x0);
 5781   %}
 5782 %}
 5783 
 5784 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5785 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5786 %{
 5787   constraint(ALLOC_IN_RC(ptr_reg));
 5788   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5789 
 5790   op_cost(10);
 5791   format %{"[$reg + $off + $lreg << $scale]" %}
 5792   interface(MEMORY_INTER) %{
 5793     base($reg);
 5794     index($lreg);
 5795     scale($scale);
 5796     disp($off);
 5797   %}
 5798 %}
 5799 
 5800 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5801 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5802 %{
 5803   constraint(ALLOC_IN_RC(ptr_reg));
 5804   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5805   match(AddP (AddP reg (ConvI2L idx)) off);
 5806 
 5807   op_cost(10);
 5808   format %{"[$reg + $off + $idx]" %}
 5809   interface(MEMORY_INTER) %{
 5810     base($reg);
 5811     index($idx);
 5812     scale(0x0);
 5813     disp($off);
 5814   %}
 5815 %}
 5816 
 5817 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5818 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5819 %{
 5820   constraint(ALLOC_IN_RC(ptr_reg));
 5821   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5822   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5823 
 5824   op_cost(10);
 5825   format %{"[$reg + $off + $idx << $scale]" %}
 5826   interface(MEMORY_INTER) %{
 5827     base($reg);
 5828     index($idx);
 5829     scale($scale);
 5830     disp($off);
 5831   %}
 5832 %}
 5833 
 5834 // Indirect Narrow Oop Operand
 5835 operand indCompressedOop(rRegN reg) %{
 5836   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5837   constraint(ALLOC_IN_RC(ptr_reg));
 5838   match(DecodeN reg);
 5839 
 5840   op_cost(10);
 5841   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5842   interface(MEMORY_INTER) %{
 5843     base(0xc); // R12
 5844     index($reg);
 5845     scale(0x3);
 5846     disp(0x0);
 5847   %}
 5848 %}
 5849 
 5850 // Indirect Narrow Oop Plus Offset Operand
 5851 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5852 // we can't free r12 even with CompressedOops::base() == nullptr.
 5853 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5854   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5855   constraint(ALLOC_IN_RC(ptr_reg));
 5856   match(AddP (DecodeN reg) off);
 5857 
 5858   op_cost(10);
 5859   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5860   interface(MEMORY_INTER) %{
 5861     base(0xc); // R12
 5862     index($reg);
 5863     scale(0x3);
 5864     disp($off);
 5865   %}
 5866 %}
 5867 
 5868 // Indirect Memory Operand
 5869 operand indirectNarrow(rRegN reg)
 5870 %{
 5871   predicate(CompressedOops::shift() == 0);
 5872   constraint(ALLOC_IN_RC(ptr_reg));
 5873   match(DecodeN reg);
 5874 
 5875   format %{ "[$reg]" %}
 5876   interface(MEMORY_INTER) %{
 5877     base($reg);
 5878     index(0x4);
 5879     scale(0x0);
 5880     disp(0x0);
 5881   %}
 5882 %}
 5883 
 5884 // Indirect Memory Plus Short Offset Operand
 5885 operand indOffset8Narrow(rRegN reg, immL8 off)
 5886 %{
 5887   predicate(CompressedOops::shift() == 0);
 5888   constraint(ALLOC_IN_RC(ptr_reg));
 5889   match(AddP (DecodeN reg) off);
 5890 
 5891   format %{ "[$reg + $off (8-bit)]" %}
 5892   interface(MEMORY_INTER) %{
 5893     base($reg);
 5894     index(0x4);
 5895     scale(0x0);
 5896     disp($off);
 5897   %}
 5898 %}
 5899 
 5900 // Indirect Memory Plus Long Offset Operand
 5901 operand indOffset32Narrow(rRegN reg, immL32 off)
 5902 %{
 5903   predicate(CompressedOops::shift() == 0);
 5904   constraint(ALLOC_IN_RC(ptr_reg));
 5905   match(AddP (DecodeN reg) off);
 5906 
 5907   format %{ "[$reg + $off (32-bit)]" %}
 5908   interface(MEMORY_INTER) %{
 5909     base($reg);
 5910     index(0x4);
 5911     scale(0x0);
 5912     disp($off);
 5913   %}
 5914 %}
 5915 
 5916 // Indirect Memory Plus Index Register Plus Offset Operand
 5917 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5918 %{
 5919   predicate(CompressedOops::shift() == 0);
 5920   constraint(ALLOC_IN_RC(ptr_reg));
 5921   match(AddP (AddP (DecodeN reg) lreg) off);
 5922 
 5923   op_cost(10);
 5924   format %{"[$reg + $off + $lreg]" %}
 5925   interface(MEMORY_INTER) %{
 5926     base($reg);
 5927     index($lreg);
 5928     scale(0x0);
 5929     disp($off);
 5930   %}
 5931 %}
 5932 
 5933 // Indirect Memory Plus Index Register Plus Offset Operand
 5934 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5935 %{
 5936   predicate(CompressedOops::shift() == 0);
 5937   constraint(ALLOC_IN_RC(ptr_reg));
 5938   match(AddP (DecodeN reg) lreg);
 5939 
 5940   op_cost(10);
 5941   format %{"[$reg + $lreg]" %}
 5942   interface(MEMORY_INTER) %{
 5943     base($reg);
 5944     index($lreg);
 5945     scale(0x0);
 5946     disp(0x0);
 5947   %}
 5948 %}
 5949 
 5950 // Indirect Memory Times Scale Plus Index Register
 5951 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5952 %{
 5953   predicate(CompressedOops::shift() == 0);
 5954   constraint(ALLOC_IN_RC(ptr_reg));
 5955   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5956 
 5957   op_cost(10);
 5958   format %{"[$reg + $lreg << $scale]" %}
 5959   interface(MEMORY_INTER) %{
 5960     base($reg);
 5961     index($lreg);
 5962     scale($scale);
 5963     disp(0x0);
 5964   %}
 5965 %}
 5966 
 5967 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5968 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5969 %{
 5970   predicate(CompressedOops::shift() == 0);
 5971   constraint(ALLOC_IN_RC(ptr_reg));
 5972   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5973 
 5974   op_cost(10);
 5975   format %{"[$reg + $off + $lreg << $scale]" %}
 5976   interface(MEMORY_INTER) %{
 5977     base($reg);
 5978     index($lreg);
 5979     scale($scale);
 5980     disp($off);
 5981   %}
 5982 %}
 5983 
 5984 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5985 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5986 %{
 5987   constraint(ALLOC_IN_RC(ptr_reg));
 5988   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5989   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5990 
 5991   op_cost(10);
 5992   format %{"[$reg + $off + $idx]" %}
 5993   interface(MEMORY_INTER) %{
 5994     base($reg);
 5995     index($idx);
 5996     scale(0x0);
 5997     disp($off);
 5998   %}
 5999 %}
 6000 
 6001 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 6002 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 6003 %{
 6004   constraint(ALLOC_IN_RC(ptr_reg));
 6005   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 6006   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 6007 
 6008   op_cost(10);
 6009   format %{"[$reg + $off + $idx << $scale]" %}
 6010   interface(MEMORY_INTER) %{
 6011     base($reg);
 6012     index($idx);
 6013     scale($scale);
 6014     disp($off);
 6015   %}
 6016 %}
 6017 
 6018 //----------Special Memory Operands--------------------------------------------
 6019 // Stack Slot Operand - This operand is used for loading and storing temporary
 6020 //                      values on the stack where a match requires a value to
 6021 //                      flow through memory.
 6022 operand stackSlotP(sRegP reg)
 6023 %{
 6024   constraint(ALLOC_IN_RC(stack_slots));
 6025   // No match rule because this operand is only generated in matching
 6026 
 6027   format %{ "[$reg]" %}
 6028   interface(MEMORY_INTER) %{
 6029     base(0x4);   // RSP
 6030     index(0x4);  // No Index
 6031     scale(0x0);  // No Scale
 6032     disp($reg);  // Stack Offset
 6033   %}
 6034 %}
 6035 
 6036 operand stackSlotI(sRegI reg)
 6037 %{
 6038   constraint(ALLOC_IN_RC(stack_slots));
 6039   // No match rule because this operand is only generated in matching
 6040 
 6041   format %{ "[$reg]" %}
 6042   interface(MEMORY_INTER) %{
 6043     base(0x4);   // RSP
 6044     index(0x4);  // No Index
 6045     scale(0x0);  // No Scale
 6046     disp($reg);  // Stack Offset
 6047   %}
 6048 %}
 6049 
 6050 operand stackSlotF(sRegF reg)
 6051 %{
 6052   constraint(ALLOC_IN_RC(stack_slots));
 6053   // No match rule because this operand is only generated in matching
 6054 
 6055   format %{ "[$reg]" %}
 6056   interface(MEMORY_INTER) %{
 6057     base(0x4);   // RSP
 6058     index(0x4);  // No Index
 6059     scale(0x0);  // No Scale
 6060     disp($reg);  // Stack Offset
 6061   %}
 6062 %}
 6063 
 6064 operand stackSlotD(sRegD reg)
 6065 %{
 6066   constraint(ALLOC_IN_RC(stack_slots));
 6067   // No match rule because this operand is only generated in matching
 6068 
 6069   format %{ "[$reg]" %}
 6070   interface(MEMORY_INTER) %{
 6071     base(0x4);   // RSP
 6072     index(0x4);  // No Index
 6073     scale(0x0);  // No Scale
 6074     disp($reg);  // Stack Offset
 6075   %}
 6076 %}
 6077 operand stackSlotL(sRegL reg)
 6078 %{
 6079   constraint(ALLOC_IN_RC(stack_slots));
 6080   // No match rule because this operand is only generated in matching
 6081 
 6082   format %{ "[$reg]" %}
 6083   interface(MEMORY_INTER) %{
 6084     base(0x4);   // RSP
 6085     index(0x4);  // No Index
 6086     scale(0x0);  // No Scale
 6087     disp($reg);  // Stack Offset
 6088   %}
 6089 %}
 6090 
 6091 //----------Conditional Branch Operands----------------------------------------
 6092 // Comparison Op  - This is the operation of the comparison, and is limited to
 6093 //                  the following set of codes:
 6094 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6095 //
 6096 // Other attributes of the comparison, such as unsignedness, are specified
 6097 // by the comparison instruction that sets a condition code flags register.
 6098 // That result is represented by a flags operand whose subtype is appropriate
 6099 // to the unsignedness (etc.) of the comparison.
 6100 //
 6101 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6102 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6103 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6104 
 6105 // Comparison Code
 6106 operand cmpOp()
 6107 %{
 6108   match(Bool);
 6109 
 6110   format %{ "" %}
 6111   interface(COND_INTER) %{
 6112     equal(0x4, "e");
 6113     not_equal(0x5, "ne");
 6114     less(0xc, "l");
 6115     greater_equal(0xd, "ge");
 6116     less_equal(0xe, "le");
 6117     greater(0xf, "g");
 6118     overflow(0x0, "o");
 6119     no_overflow(0x1, "no");
 6120   %}
 6121 %}
 6122 
 6123 // Comparison Code, unsigned compare.  Used by FP also, with
 6124 // C2 (unordered) turned into GT or LT already.  The other bits
 6125 // C0 and C3 are turned into Carry & Zero flags.
 6126 operand cmpOpU()
 6127 %{
 6128   match(Bool);
 6129 
 6130   format %{ "" %}
 6131   interface(COND_INTER) %{
 6132     equal(0x4, "e");
 6133     not_equal(0x5, "ne");
 6134     less(0x2, "b");
 6135     greater_equal(0x3, "ae");
 6136     less_equal(0x6, "be");
 6137     greater(0x7, "a");
 6138     overflow(0x0, "o");
 6139     no_overflow(0x1, "no");
 6140   %}
 6141 %}
 6142 
 6143 
 6144 // Floating comparisons that don't require any fixup for the unordered case,
 6145 // If both inputs of the comparison are the same, ZF is always set so we
 6146 // don't need to use cmpOpUCF2 for eq/ne
 6147 operand cmpOpUCF() %{
 6148   match(Bool);
 6149   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6150             (n->as_Bool()->_test._test == BoolTest::lt ||
 6151              n->as_Bool()->_test._test == BoolTest::ge ||
 6152              n->as_Bool()->_test._test == BoolTest::le ||
 6153              n->as_Bool()->_test._test == BoolTest::gt ||
 6154              n->in(1)->in(1) == n->in(1)->in(2)));
 6155   format %{ "" %}
 6156   interface(COND_INTER) %{
 6157     equal(0xb, "np");
 6158     not_equal(0xa, "p");
 6159     less(0x2, "b");
 6160     greater_equal(0x3, "ae");
 6161     less_equal(0x6, "be");
 6162     greater(0x7, "a");
 6163     overflow(0x0, "o");
 6164     no_overflow(0x1, "no");
 6165   %}
 6166 %}
 6167 
 6168 
 6169 // Floating comparisons that can be fixed up with extra conditional jumps
 6170 operand cmpOpUCF2() %{
 6171   match(Bool);
 6172   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6173             (n->as_Bool()->_test._test == BoolTest::ne ||
 6174              n->as_Bool()->_test._test == BoolTest::eq) &&
 6175             n->in(1)->in(1) != n->in(1)->in(2));
 6176   format %{ "" %}
 6177   interface(COND_INTER) %{
 6178     equal(0x4, "e");
 6179     not_equal(0x5, "ne");
 6180     less(0x2, "b");
 6181     greater_equal(0x3, "ae");
 6182     less_equal(0x6, "be");
 6183     greater(0x7, "a");
 6184     overflow(0x0, "o");
 6185     no_overflow(0x1, "no");
 6186   %}
 6187 %}
 6188 
 6189 
 6190 // Floating point comparisons that set condition flags to test more directly,
 6191 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
 6192 // are used for L (<) and LE (<=) conditions. It's important to convert these
 6193 // latter conditions to ones that use unsigned tests before passing into an
 6194 // instruction because the preceding comparison might be based on a three way
 6195 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
 6196 operand cmpOpUCFE()
 6197 %{
 6198   match(Bool);
 6199   predicate((UseAPX && VM_Version::supports_avx10_2()) &&
 6200             (n->as_Bool()->_test._test == BoolTest::ne ||
 6201              n->as_Bool()->_test._test == BoolTest::eq ||
 6202              n->as_Bool()->_test._test == BoolTest::lt ||
 6203              n->as_Bool()->_test._test == BoolTest::ge ||
 6204              n->as_Bool()->_test._test == BoolTest::le ||
 6205              n->as_Bool()->_test._test == BoolTest::gt));
 6206 
 6207   format %{ "" %}
 6208   interface(COND_INTER) %{
 6209     equal(0x4, "e");
 6210     not_equal(0x5, "ne");
 6211     less(0x2, "b");
 6212     greater_equal(0x3, "ae");
 6213     less_equal(0x6, "be");
 6214     greater(0x7, "a");
 6215     overflow(0x0, "o");
 6216     no_overflow(0x1, "no");
 6217   %}
 6218 %}
 6219 
 6220 // Operands for bound floating pointer register arguments
 6221 operand rxmm0() %{
 6222   constraint(ALLOC_IN_RC(xmm0_reg));
 6223   match(VecX);
 6224   format%{%}
 6225   interface(REG_INTER);
 6226 %}
 6227 
 6228 // Vectors
 6229 
 6230 // Dummy generic vector class. Should be used for all vector operands.
 6231 // Replaced with vec[SDXYZ] during post-selection pass.
 6232 operand vec() %{
 6233   constraint(ALLOC_IN_RC(dynamic));
 6234   match(VecX);
 6235   match(VecY);
 6236   match(VecZ);
 6237   match(VecS);
 6238   match(VecD);
 6239 
 6240   format %{ %}
 6241   interface(REG_INTER);
 6242 %}
 6243 
 6244 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6245 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6246 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6247 // runtime code generation via reg_class_dynamic.
 6248 operand legVec() %{
 6249   constraint(ALLOC_IN_RC(dynamic));
 6250   match(VecX);
 6251   match(VecY);
 6252   match(VecZ);
 6253   match(VecS);
 6254   match(VecD);
 6255 
 6256   format %{ %}
 6257   interface(REG_INTER);
 6258 %}
 6259 
 6260 // Replaces vec during post-selection cleanup. See above.
 6261 operand vecS() %{
 6262   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6263   match(VecS);
 6264 
 6265   format %{ %}
 6266   interface(REG_INTER);
 6267 %}
 6268 
 6269 // Replaces legVec during post-selection cleanup. See above.
 6270 operand legVecS() %{
 6271   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6272   match(VecS);
 6273 
 6274   format %{ %}
 6275   interface(REG_INTER);
 6276 %}
 6277 
 6278 // Replaces vec during post-selection cleanup. See above.
 6279 operand vecD() %{
 6280   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6281   match(VecD);
 6282 
 6283   format %{ %}
 6284   interface(REG_INTER);
 6285 %}
 6286 
 6287 // Replaces legVec during post-selection cleanup. See above.
 6288 operand legVecD() %{
 6289   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6290   match(VecD);
 6291 
 6292   format %{ %}
 6293   interface(REG_INTER);
 6294 %}
 6295 
 6296 // Replaces vec during post-selection cleanup. See above.
 6297 operand vecX() %{
 6298   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6299   match(VecX);
 6300 
 6301   format %{ %}
 6302   interface(REG_INTER);
 6303 %}
 6304 
 6305 // Replaces legVec during post-selection cleanup. See above.
 6306 operand legVecX() %{
 6307   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6308   match(VecX);
 6309 
 6310   format %{ %}
 6311   interface(REG_INTER);
 6312 %}
 6313 
 6314 // Replaces vec during post-selection cleanup. See above.
 6315 operand vecY() %{
 6316   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6317   match(VecY);
 6318 
 6319   format %{ %}
 6320   interface(REG_INTER);
 6321 %}
 6322 
 6323 // Replaces legVec during post-selection cleanup. See above.
 6324 operand legVecY() %{
 6325   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6326   match(VecY);
 6327 
 6328   format %{ %}
 6329   interface(REG_INTER);
 6330 %}
 6331 
 6332 // Replaces vec during post-selection cleanup. See above.
 6333 operand vecZ() %{
 6334   constraint(ALLOC_IN_RC(vectorz_reg));
 6335   match(VecZ);
 6336 
 6337   format %{ %}
 6338   interface(REG_INTER);
 6339 %}
 6340 
 6341 // Replaces legVec during post-selection cleanup. See above.
 6342 operand legVecZ() %{
 6343   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6344   match(VecZ);
 6345 
 6346   format %{ %}
 6347   interface(REG_INTER);
 6348 %}
 6349 
 6350 //----------OPERAND CLASSES----------------------------------------------------
 6351 // Operand Classes are groups of operands that are used as to simplify
 6352 // instruction definitions by not requiring the AD writer to specify separate
 6353 // instructions for every form of operand when the instruction accepts
 6354 // multiple operand types with the same basic encoding and format.  The classic
 6355 // case of this is memory operands.
 6356 
 6357 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6358                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6359                indCompressedOop, indCompressedOopOffset,
 6360                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6361                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6362                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6363 
 6364 //----------PIPELINE-----------------------------------------------------------
 6365 // Rules which define the behavior of the target architectures pipeline.
 6366 pipeline %{
 6367 
 6368 //----------ATTRIBUTES---------------------------------------------------------
 6369 attributes %{
 6370   variable_size_instructions;        // Fixed size instructions
 6371   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6372   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6373   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6374   instruction_fetch_units = 1;       // of 16 bytes
 6375 %}
 6376 
 6377 //----------RESOURCES----------------------------------------------------------
 6378 // Resources are the functional units available to the machine
 6379 
 6380 // Generic P2/P3 pipeline
 6381 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6382 // 3 instructions decoded per cycle.
 6383 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6384 // 3 ALU op, only ALU0 handles mul instructions.
 6385 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6386            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6387            BR, FPU,
 6388            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6389 
 6390 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6391 // Pipeline Description specifies the stages in the machine's pipeline
 6392 
 6393 // Generic P2/P3 pipeline
 6394 pipe_desc(S0, S1, S2, S3, S4, S5);
 6395 
 6396 //----------PIPELINE CLASSES---------------------------------------------------
 6397 // Pipeline Classes describe the stages in which input and output are
 6398 // referenced by the hardware pipeline.
 6399 
 6400 // Naming convention: ialu or fpu
 6401 // Then: _reg
 6402 // Then: _reg if there is a 2nd register
 6403 // Then: _long if it's a pair of instructions implementing a long
 6404 // Then: _fat if it requires the big decoder
 6405 //   Or: _mem if it requires the big decoder and a memory unit.
 6406 
 6407 // Integer ALU reg operation
 6408 pipe_class ialu_reg(rRegI dst)
 6409 %{
 6410     single_instruction;
 6411     dst    : S4(write);
 6412     dst    : S3(read);
 6413     DECODE : S0;        // any decoder
 6414     ALU    : S3;        // any alu
 6415 %}
 6416 
 6417 // Long ALU reg operation
 6418 pipe_class ialu_reg_long(rRegL dst)
 6419 %{
 6420     instruction_count(2);
 6421     dst    : S4(write);
 6422     dst    : S3(read);
 6423     DECODE : S0(2);     // any 2 decoders
 6424     ALU    : S3(2);     // both alus
 6425 %}
 6426 
 6427 // Integer ALU reg operation using big decoder
 6428 pipe_class ialu_reg_fat(rRegI dst)
 6429 %{
 6430     single_instruction;
 6431     dst    : S4(write);
 6432     dst    : S3(read);
 6433     D0     : S0;        // big decoder only
 6434     ALU    : S3;        // any alu
 6435 %}
 6436 
 6437 // Integer ALU reg-reg operation
 6438 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6439 %{
 6440     single_instruction;
 6441     dst    : S4(write);
 6442     src    : S3(read);
 6443     DECODE : S0;        // any decoder
 6444     ALU    : S3;        // any alu
 6445 %}
 6446 
 6447 // Integer ALU reg-reg operation
 6448 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6449 %{
 6450     single_instruction;
 6451     dst    : S4(write);
 6452     src    : S3(read);
 6453     D0     : S0;        // big decoder only
 6454     ALU    : S3;        // any alu
 6455 %}
 6456 
 6457 // Integer ALU reg-mem operation
 6458 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6459 %{
 6460     single_instruction;
 6461     dst    : S5(write);
 6462     mem    : S3(read);
 6463     D0     : S0;        // big decoder only
 6464     ALU    : S4;        // any alu
 6465     MEM    : S3;        // any mem
 6466 %}
 6467 
 6468 // Integer mem operation (prefetch)
 6469 pipe_class ialu_mem(memory mem)
 6470 %{
 6471     single_instruction;
 6472     mem    : S3(read);
 6473     D0     : S0;        // big decoder only
 6474     MEM    : S3;        // any mem
 6475 %}
 6476 
 6477 // Integer Store to Memory
 6478 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6479 %{
 6480     single_instruction;
 6481     mem    : S3(read);
 6482     src    : S5(read);
 6483     D0     : S0;        // big decoder only
 6484     ALU    : S4;        // any alu
 6485     MEM    : S3;
 6486 %}
 6487 
 6488 // // Long Store to Memory
 6489 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6490 // %{
 6491 //     instruction_count(2);
 6492 //     mem    : S3(read);
 6493 //     src    : S5(read);
 6494 //     D0     : S0(2);          // big decoder only; twice
 6495 //     ALU    : S4(2);     // any 2 alus
 6496 //     MEM    : S3(2);  // Both mems
 6497 // %}
 6498 
 6499 // Integer Store to Memory
 6500 pipe_class ialu_mem_imm(memory mem)
 6501 %{
 6502     single_instruction;
 6503     mem    : S3(read);
 6504     D0     : S0;        // big decoder only
 6505     ALU    : S4;        // any alu
 6506     MEM    : S3;
 6507 %}
 6508 
 6509 // Integer ALU0 reg-reg operation
 6510 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6511 %{
 6512     single_instruction;
 6513     dst    : S4(write);
 6514     src    : S3(read);
 6515     D0     : S0;        // Big decoder only
 6516     ALU0   : S3;        // only alu0
 6517 %}
 6518 
 6519 // Integer ALU0 reg-mem operation
 6520 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6521 %{
 6522     single_instruction;
 6523     dst    : S5(write);
 6524     mem    : S3(read);
 6525     D0     : S0;        // big decoder only
 6526     ALU0   : S4;        // ALU0 only
 6527     MEM    : S3;        // any mem
 6528 %}
 6529 
 6530 // Integer ALU reg-reg operation
 6531 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6532 %{
 6533     single_instruction;
 6534     cr     : S4(write);
 6535     src1   : S3(read);
 6536     src2   : S3(read);
 6537     DECODE : S0;        // any decoder
 6538     ALU    : S3;        // any alu
 6539 %}
 6540 
 6541 // Integer ALU reg-imm operation
 6542 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6543 %{
 6544     single_instruction;
 6545     cr     : S4(write);
 6546     src1   : S3(read);
 6547     DECODE : S0;        // any decoder
 6548     ALU    : S3;        // any alu
 6549 %}
 6550 
 6551 // Integer ALU reg-mem operation
 6552 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6553 %{
 6554     single_instruction;
 6555     cr     : S4(write);
 6556     src1   : S3(read);
 6557     src2   : S3(read);
 6558     D0     : S0;        // big decoder only
 6559     ALU    : S4;        // any alu
 6560     MEM    : S3;
 6561 %}
 6562 
 6563 // Conditional move reg-reg
 6564 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6565 %{
 6566     instruction_count(4);
 6567     y      : S4(read);
 6568     q      : S3(read);
 6569     p      : S3(read);
 6570     DECODE : S0(4);     // any decoder
 6571 %}
 6572 
 6573 // Conditional move reg-reg
 6574 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6575 %{
 6576     single_instruction;
 6577     dst    : S4(write);
 6578     src    : S3(read);
 6579     cr     : S3(read);
 6580     DECODE : S0;        // any decoder
 6581 %}
 6582 
 6583 // Conditional move reg-mem
 6584 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6585 %{
 6586     single_instruction;
 6587     dst    : S4(write);
 6588     src    : S3(read);
 6589     cr     : S3(read);
 6590     DECODE : S0;        // any decoder
 6591     MEM    : S3;
 6592 %}
 6593 
 6594 // Conditional move reg-reg long
 6595 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6596 %{
 6597     single_instruction;
 6598     dst    : S4(write);
 6599     src    : S3(read);
 6600     cr     : S3(read);
 6601     DECODE : S0(2);     // any 2 decoders
 6602 %}
 6603 
 6604 // Float reg-reg operation
 6605 pipe_class fpu_reg(regD dst)
 6606 %{
 6607     instruction_count(2);
 6608     dst    : S3(read);
 6609     DECODE : S0(2);     // any 2 decoders
 6610     FPU    : S3;
 6611 %}
 6612 
 6613 // Float reg-reg operation
 6614 pipe_class fpu_reg_reg(regD dst, regD src)
 6615 %{
 6616     instruction_count(2);
 6617     dst    : S4(write);
 6618     src    : S3(read);
 6619     DECODE : S0(2);     // any 2 decoders
 6620     FPU    : S3;
 6621 %}
 6622 
 6623 // Float reg-reg operation
 6624 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6625 %{
 6626     instruction_count(3);
 6627     dst    : S4(write);
 6628     src1   : S3(read);
 6629     src2   : S3(read);
 6630     DECODE : S0(3);     // any 3 decoders
 6631     FPU    : S3(2);
 6632 %}
 6633 
 6634 // Float reg-reg operation
 6635 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6636 %{
 6637     instruction_count(4);
 6638     dst    : S4(write);
 6639     src1   : S3(read);
 6640     src2   : S3(read);
 6641     src3   : S3(read);
 6642     DECODE : S0(4);     // any 3 decoders
 6643     FPU    : S3(2);
 6644 %}
 6645 
 6646 // Float reg-reg operation
 6647 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6648 %{
 6649     instruction_count(4);
 6650     dst    : S4(write);
 6651     src1   : S3(read);
 6652     src2   : S3(read);
 6653     src3   : S3(read);
 6654     DECODE : S1(3);     // any 3 decoders
 6655     D0     : S0;        // Big decoder only
 6656     FPU    : S3(2);
 6657     MEM    : S3;
 6658 %}
 6659 
 6660 // Float reg-mem operation
 6661 pipe_class fpu_reg_mem(regD dst, memory mem)
 6662 %{
 6663     instruction_count(2);
 6664     dst    : S5(write);
 6665     mem    : S3(read);
 6666     D0     : S0;        // big decoder only
 6667     DECODE : S1;        // any decoder for FPU POP
 6668     FPU    : S4;
 6669     MEM    : S3;        // any mem
 6670 %}
 6671 
 6672 // Float reg-mem operation
 6673 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6674 %{
 6675     instruction_count(3);
 6676     dst    : S5(write);
 6677     src1   : S3(read);
 6678     mem    : S3(read);
 6679     D0     : S0;        // big decoder only
 6680     DECODE : S1(2);     // any decoder for FPU POP
 6681     FPU    : S4;
 6682     MEM    : S3;        // any mem
 6683 %}
 6684 
 6685 // Float mem-reg operation
 6686 pipe_class fpu_mem_reg(memory mem, regD src)
 6687 %{
 6688     instruction_count(2);
 6689     src    : S5(read);
 6690     mem    : S3(read);
 6691     DECODE : S0;        // any decoder for FPU PUSH
 6692     D0     : S1;        // big decoder only
 6693     FPU    : S4;
 6694     MEM    : S3;        // any mem
 6695 %}
 6696 
 6697 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6698 %{
 6699     instruction_count(3);
 6700     src1   : S3(read);
 6701     src2   : S3(read);
 6702     mem    : S3(read);
 6703     DECODE : S0(2);     // any decoder for FPU PUSH
 6704     D0     : S1;        // big decoder only
 6705     FPU    : S4;
 6706     MEM    : S3;        // any mem
 6707 %}
 6708 
 6709 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6710 %{
 6711     instruction_count(3);
 6712     src1   : S3(read);
 6713     src2   : S3(read);
 6714     mem    : S4(read);
 6715     DECODE : S0;        // any decoder for FPU PUSH
 6716     D0     : S0(2);     // big decoder only
 6717     FPU    : S4;
 6718     MEM    : S3(2);     // any mem
 6719 %}
 6720 
 6721 pipe_class fpu_mem_mem(memory dst, memory src1)
 6722 %{
 6723     instruction_count(2);
 6724     src1   : S3(read);
 6725     dst    : S4(read);
 6726     D0     : S0(2);     // big decoder only
 6727     MEM    : S3(2);     // any mem
 6728 %}
 6729 
 6730 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6731 %{
 6732     instruction_count(3);
 6733     src1   : S3(read);
 6734     src2   : S3(read);
 6735     dst    : S4(read);
 6736     D0     : S0(3);     // big decoder only
 6737     FPU    : S4;
 6738     MEM    : S3(3);     // any mem
 6739 %}
 6740 
 6741 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6742 %{
 6743     instruction_count(3);
 6744     src1   : S4(read);
 6745     mem    : S4(read);
 6746     DECODE : S0;        // any decoder for FPU PUSH
 6747     D0     : S0(2);     // big decoder only
 6748     FPU    : S4;
 6749     MEM    : S3(2);     // any mem
 6750 %}
 6751 
 6752 // Float load constant
 6753 pipe_class fpu_reg_con(regD dst)
 6754 %{
 6755     instruction_count(2);
 6756     dst    : S5(write);
 6757     D0     : S0;        // big decoder only for the load
 6758     DECODE : S1;        // any decoder for FPU POP
 6759     FPU    : S4;
 6760     MEM    : S3;        // any mem
 6761 %}
 6762 
 6763 // Float load constant
 6764 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6765 %{
 6766     instruction_count(3);
 6767     dst    : S5(write);
 6768     src    : S3(read);
 6769     D0     : S0;        // big decoder only for the load
 6770     DECODE : S1(2);     // any decoder for FPU POP
 6771     FPU    : S4;
 6772     MEM    : S3;        // any mem
 6773 %}
 6774 
 6775 // UnConditional branch
 6776 pipe_class pipe_jmp(label labl)
 6777 %{
 6778     single_instruction;
 6779     BR   : S3;
 6780 %}
 6781 
 6782 // Conditional branch
 6783 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6784 %{
 6785     single_instruction;
 6786     cr    : S1(read);
 6787     BR    : S3;
 6788 %}
 6789 
 6790 // Allocation idiom
 6791 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6792 %{
 6793     instruction_count(1); force_serialization;
 6794     fixed_latency(6);
 6795     heap_ptr : S3(read);
 6796     DECODE   : S0(3);
 6797     D0       : S2;
 6798     MEM      : S3;
 6799     ALU      : S3(2);
 6800     dst      : S5(write);
 6801     BR       : S5;
 6802 %}
 6803 
 6804 // Generic big/slow expanded idiom
 6805 pipe_class pipe_slow()
 6806 %{
 6807     instruction_count(10); multiple_bundles; force_serialization;
 6808     fixed_latency(100);
 6809     D0  : S0(2);
 6810     MEM : S3(2);
 6811 %}
 6812 
 6813 // The real do-nothing guy
 6814 pipe_class empty()
 6815 %{
 6816     instruction_count(0);
 6817 %}
 6818 
 6819 // Define the class for the Nop node
 6820 define
 6821 %{
 6822    MachNop = empty;
 6823 %}
 6824 
 6825 %}
 6826 
 6827 //----------INSTRUCTIONS-------------------------------------------------------
 6828 //
 6829 // match      -- States which machine-independent subtree may be replaced
 6830 //               by this instruction.
 6831 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6832 //               selection to identify a minimum cost tree of machine
 6833 //               instructions that matches a tree of machine-independent
 6834 //               instructions.
 6835 // format     -- A string providing the disassembly for this instruction.
 6836 //               The value of an instruction's operand may be inserted
 6837 //               by referring to it with a '$' prefix.
 6838 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6839 //               to within an encode class as $primary, $secondary, and $tertiary
 6840 //               rrspectively.  The primary opcode is commonly used to
 6841 //               indicate the type of machine instruction, while secondary
 6842 //               and tertiary are often used for prefix options or addressing
 6843 //               modes.
 6844 // ins_encode -- A list of encode classes with parameters. The encode class
 6845 //               name must have been defined in an 'enc_class' specification
 6846 //               in the encode section of the architecture description.
 6847 
 6848 // ============================================================================
 6849 
 6850 instruct ShouldNotReachHere() %{
 6851   match(Halt);
 6852   format %{ "stop\t# ShouldNotReachHere" %}
 6853   ins_encode %{
 6854     if (is_reachable()) {
 6855       const char* str = __ code_string(_halt_reason);
 6856       __ stop(str);
 6857     }
 6858   %}
 6859   ins_pipe(pipe_slow);
 6860 %}
 6861 
 6862 // ============================================================================
 6863 
 6864 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6865 // Load Float
 6866 instruct MoveF2VL(vlRegF dst, regF src) %{
 6867   match(Set dst src);
 6868   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6869   ins_encode %{
 6870     ShouldNotReachHere();
 6871   %}
 6872   ins_pipe( fpu_reg_reg );
 6873 %}
 6874 
 6875 // Load Float
 6876 instruct MoveF2LEG(legRegF dst, regF src) %{
 6877   match(Set dst src);
 6878   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6879   ins_encode %{
 6880     ShouldNotReachHere();
 6881   %}
 6882   ins_pipe( fpu_reg_reg );
 6883 %}
 6884 
 6885 // Load Float
 6886 instruct MoveVL2F(regF dst, vlRegF src) %{
 6887   match(Set dst src);
 6888   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6889   ins_encode %{
 6890     ShouldNotReachHere();
 6891   %}
 6892   ins_pipe( fpu_reg_reg );
 6893 %}
 6894 
 6895 // Load Float
 6896 instruct MoveLEG2F(regF dst, legRegF src) %{
 6897   match(Set dst src);
 6898   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6899   ins_encode %{
 6900     ShouldNotReachHere();
 6901   %}
 6902   ins_pipe( fpu_reg_reg );
 6903 %}
 6904 
 6905 // Load Double
 6906 instruct MoveD2VL(vlRegD dst, regD src) %{
 6907   match(Set dst src);
 6908   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6909   ins_encode %{
 6910     ShouldNotReachHere();
 6911   %}
 6912   ins_pipe( fpu_reg_reg );
 6913 %}
 6914 
 6915 // Load Double
 6916 instruct MoveD2LEG(legRegD dst, regD src) %{
 6917   match(Set dst src);
 6918   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6919   ins_encode %{
 6920     ShouldNotReachHere();
 6921   %}
 6922   ins_pipe( fpu_reg_reg );
 6923 %}
 6924 
 6925 // Load Double
 6926 instruct MoveVL2D(regD dst, vlRegD src) %{
 6927   match(Set dst src);
 6928   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6929   ins_encode %{
 6930     ShouldNotReachHere();
 6931   %}
 6932   ins_pipe( fpu_reg_reg );
 6933 %}
 6934 
 6935 // Load Double
 6936 instruct MoveLEG2D(regD dst, legRegD src) %{
 6937   match(Set dst src);
 6938   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6939   ins_encode %{
 6940     ShouldNotReachHere();
 6941   %}
 6942   ins_pipe( fpu_reg_reg );
 6943 %}
 6944 
 6945 //----------Load/Store/Move Instructions---------------------------------------
 6946 //----------Load Instructions--------------------------------------------------
 6947 
 6948 // Load Byte (8 bit signed)
 6949 instruct loadB(rRegI dst, memory mem)
 6950 %{
 6951   match(Set dst (LoadB mem));
 6952 
 6953   ins_cost(125);
 6954   format %{ "movsbl  $dst, $mem\t# byte" %}
 6955 
 6956   ins_encode %{
 6957     __ movsbl($dst$$Register, $mem$$Address);
 6958   %}
 6959 
 6960   ins_pipe(ialu_reg_mem);
 6961 %}
 6962 
 6963 // Load Byte (8 bit signed) into Long Register
 6964 instruct loadB2L(rRegL dst, memory mem)
 6965 %{
 6966   match(Set dst (ConvI2L (LoadB mem)));
 6967 
 6968   ins_cost(125);
 6969   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6970 
 6971   ins_encode %{
 6972     __ movsbq($dst$$Register, $mem$$Address);
 6973   %}
 6974 
 6975   ins_pipe(ialu_reg_mem);
 6976 %}
 6977 
 6978 // Load Unsigned Byte (8 bit UNsigned)
 6979 instruct loadUB(rRegI dst, memory mem)
 6980 %{
 6981   match(Set dst (LoadUB mem));
 6982 
 6983   ins_cost(125);
 6984   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6985 
 6986   ins_encode %{
 6987     __ movzbl($dst$$Register, $mem$$Address);
 6988   %}
 6989 
 6990   ins_pipe(ialu_reg_mem);
 6991 %}
 6992 
 6993 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6994 instruct loadUB2L(rRegL dst, memory mem)
 6995 %{
 6996   match(Set dst (ConvI2L (LoadUB mem)));
 6997 
 6998   ins_cost(125);
 6999   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 7000 
 7001   ins_encode %{
 7002     __ movzbq($dst$$Register, $mem$$Address);
 7003   %}
 7004 
 7005   ins_pipe(ialu_reg_mem);
 7006 %}
 7007 
 7008 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 7009 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7010   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 7011   effect(KILL cr);
 7012 
 7013   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 7014             "andl    $dst, right_n_bits($mask, 8)" %}
 7015   ins_encode %{
 7016     Register Rdst = $dst$$Register;
 7017     __ movzbq(Rdst, $mem$$Address);
 7018     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 7019   %}
 7020   ins_pipe(ialu_reg_mem);
 7021 %}
 7022 
 7023 // Load Short (16 bit signed)
 7024 instruct loadS(rRegI dst, memory mem)
 7025 %{
 7026   match(Set dst (LoadS mem));
 7027 
 7028   ins_cost(125);
 7029   format %{ "movswl $dst, $mem\t# short" %}
 7030 
 7031   ins_encode %{
 7032     __ movswl($dst$$Register, $mem$$Address);
 7033   %}
 7034 
 7035   ins_pipe(ialu_reg_mem);
 7036 %}
 7037 
 7038 // Load Short (16 bit signed) to Byte (8 bit signed)
 7039 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7040   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 7041 
 7042   ins_cost(125);
 7043   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 7044   ins_encode %{
 7045     __ movsbl($dst$$Register, $mem$$Address);
 7046   %}
 7047   ins_pipe(ialu_reg_mem);
 7048 %}
 7049 
 7050 // Load Short (16 bit signed) into Long Register
 7051 instruct loadS2L(rRegL dst, memory mem)
 7052 %{
 7053   match(Set dst (ConvI2L (LoadS mem)));
 7054 
 7055   ins_cost(125);
 7056   format %{ "movswq $dst, $mem\t# short -> long" %}
 7057 
 7058   ins_encode %{
 7059     __ movswq($dst$$Register, $mem$$Address);
 7060   %}
 7061 
 7062   ins_pipe(ialu_reg_mem);
 7063 %}
 7064 
 7065 // Load Unsigned Short/Char (16 bit UNsigned)
 7066 instruct loadUS(rRegI dst, memory mem)
 7067 %{
 7068   match(Set dst (LoadUS mem));
 7069 
 7070   ins_cost(125);
 7071   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 7072 
 7073   ins_encode %{
 7074     __ movzwl($dst$$Register, $mem$$Address);
 7075   %}
 7076 
 7077   ins_pipe(ialu_reg_mem);
 7078 %}
 7079 
 7080 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 7081 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7082   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 7083 
 7084   ins_cost(125);
 7085   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 7086   ins_encode %{
 7087     __ movsbl($dst$$Register, $mem$$Address);
 7088   %}
 7089   ins_pipe(ialu_reg_mem);
 7090 %}
 7091 
 7092 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 7093 instruct loadUS2L(rRegL dst, memory mem)
 7094 %{
 7095   match(Set dst (ConvI2L (LoadUS mem)));
 7096 
 7097   ins_cost(125);
 7098   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 7099 
 7100   ins_encode %{
 7101     __ movzwq($dst$$Register, $mem$$Address);
 7102   %}
 7103 
 7104   ins_pipe(ialu_reg_mem);
 7105 %}
 7106 
 7107 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7108 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7109   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7110 
 7111   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7112   ins_encode %{
 7113     __ movzbq($dst$$Register, $mem$$Address);
 7114   %}
 7115   ins_pipe(ialu_reg_mem);
 7116 %}
 7117 
 7118 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7119 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7120   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7121   effect(KILL cr);
 7122 
 7123   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7124             "andl    $dst, right_n_bits($mask, 16)" %}
 7125   ins_encode %{
 7126     Register Rdst = $dst$$Register;
 7127     __ movzwq(Rdst, $mem$$Address);
 7128     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7129   %}
 7130   ins_pipe(ialu_reg_mem);
 7131 %}
 7132 
 7133 // Load Integer
 7134 instruct loadI(rRegI dst, memory mem)
 7135 %{
 7136   match(Set dst (LoadI mem));
 7137 
 7138   ins_cost(125);
 7139   format %{ "movl    $dst, $mem\t# int" %}
 7140 
 7141   ins_encode %{
 7142     __ movl($dst$$Register, $mem$$Address);
 7143   %}
 7144 
 7145   ins_pipe(ialu_reg_mem);
 7146 %}
 7147 
 7148 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7149 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7150   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7151 
 7152   ins_cost(125);
 7153   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7154   ins_encode %{
 7155     __ movsbl($dst$$Register, $mem$$Address);
 7156   %}
 7157   ins_pipe(ialu_reg_mem);
 7158 %}
 7159 
 7160 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7161 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7162   match(Set dst (AndI (LoadI mem) mask));
 7163 
 7164   ins_cost(125);
 7165   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7166   ins_encode %{
 7167     __ movzbl($dst$$Register, $mem$$Address);
 7168   %}
 7169   ins_pipe(ialu_reg_mem);
 7170 %}
 7171 
 7172 // Load Integer (32 bit signed) to Short (16 bit signed)
 7173 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7174   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7175 
 7176   ins_cost(125);
 7177   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7178   ins_encode %{
 7179     __ movswl($dst$$Register, $mem$$Address);
 7180   %}
 7181   ins_pipe(ialu_reg_mem);
 7182 %}
 7183 
 7184 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7185 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7186   match(Set dst (AndI (LoadI mem) mask));
 7187 
 7188   ins_cost(125);
 7189   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7190   ins_encode %{
 7191     __ movzwl($dst$$Register, $mem$$Address);
 7192   %}
 7193   ins_pipe(ialu_reg_mem);
 7194 %}
 7195 
 7196 // Load Integer into Long Register
 7197 instruct loadI2L(rRegL dst, memory mem)
 7198 %{
 7199   match(Set dst (ConvI2L (LoadI mem)));
 7200 
 7201   ins_cost(125);
 7202   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7203 
 7204   ins_encode %{
 7205     __ movslq($dst$$Register, $mem$$Address);
 7206   %}
 7207 
 7208   ins_pipe(ialu_reg_mem);
 7209 %}
 7210 
 7211 // Load Integer with mask 0xFF into Long Register
 7212 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7213   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7214 
 7215   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7216   ins_encode %{
 7217     __ movzbq($dst$$Register, $mem$$Address);
 7218   %}
 7219   ins_pipe(ialu_reg_mem);
 7220 %}
 7221 
 7222 // Load Integer with mask 0xFFFF into Long Register
 7223 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7224   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7225 
 7226   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7227   ins_encode %{
 7228     __ movzwq($dst$$Register, $mem$$Address);
 7229   %}
 7230   ins_pipe(ialu_reg_mem);
 7231 %}
 7232 
 7233 // Load Integer with a 31-bit mask into Long Register
 7234 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7235   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7236   effect(KILL cr);
 7237 
 7238   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7239             "andl    $dst, $mask" %}
 7240   ins_encode %{
 7241     Register Rdst = $dst$$Register;
 7242     __ movl(Rdst, $mem$$Address);
 7243     __ andl(Rdst, $mask$$constant);
 7244   %}
 7245   ins_pipe(ialu_reg_mem);
 7246 %}
 7247 
 7248 // Load Unsigned Integer into Long Register
 7249 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7250 %{
 7251   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7252 
 7253   ins_cost(125);
 7254   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7255 
 7256   ins_encode %{
 7257     __ movl($dst$$Register, $mem$$Address);
 7258   %}
 7259 
 7260   ins_pipe(ialu_reg_mem);
 7261 %}
 7262 
 7263 // Load Long
 7264 instruct loadL(rRegL dst, memory mem)
 7265 %{
 7266   match(Set dst (LoadL mem));
 7267 
 7268   ins_cost(125);
 7269   format %{ "movq    $dst, $mem\t# long" %}
 7270 
 7271   ins_encode %{
 7272     __ movq($dst$$Register, $mem$$Address);
 7273   %}
 7274 
 7275   ins_pipe(ialu_reg_mem); // XXX
 7276 %}
 7277 
 7278 // Load Range
 7279 instruct loadRange(rRegI dst, memory mem)
 7280 %{
 7281   match(Set dst (LoadRange mem));
 7282 
 7283   ins_cost(125); // XXX
 7284   format %{ "movl    $dst, $mem\t# range" %}
 7285   ins_encode %{
 7286     __ movl($dst$$Register, $mem$$Address);
 7287   %}
 7288   ins_pipe(ialu_reg_mem);
 7289 %}
 7290 
 7291 // Load Pointer
 7292 instruct loadP(rRegP dst, memory mem)
 7293 %{
 7294   match(Set dst (LoadP mem));
 7295   predicate(n->as_Load()->barrier_data() == 0);
 7296 
 7297   ins_cost(125); // XXX
 7298   format %{ "movq    $dst, $mem\t# ptr" %}
 7299   ins_encode %{
 7300     __ movq($dst$$Register, $mem$$Address);
 7301   %}
 7302   ins_pipe(ialu_reg_mem); // XXX
 7303 %}
 7304 
 7305 // Load Compressed Pointer
 7306 instruct loadN(rRegN dst, memory mem)
 7307 %{
 7308    predicate(n->as_Load()->barrier_data() == 0);
 7309    match(Set dst (LoadN mem));
 7310 
 7311    ins_cost(125); // XXX
 7312    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7313    ins_encode %{
 7314      __ movl($dst$$Register, $mem$$Address);
 7315    %}
 7316    ins_pipe(ialu_reg_mem); // XXX
 7317 %}
 7318 
 7319 
 7320 // Load Klass Pointer
 7321 instruct loadKlass(rRegP dst, memory mem)
 7322 %{
 7323   match(Set dst (LoadKlass mem));
 7324 
 7325   ins_cost(125); // XXX
 7326   format %{ "movq    $dst, $mem\t# class" %}
 7327   ins_encode %{
 7328     __ movq($dst$$Register, $mem$$Address);
 7329   %}
 7330   ins_pipe(ialu_reg_mem); // XXX
 7331 %}
 7332 
 7333 // Load narrow Klass Pointer
 7334 instruct loadNKlass(rRegN dst, memory mem)
 7335 %{
 7336   predicate(!UseCompactObjectHeaders);
 7337   match(Set dst (LoadNKlass mem));
 7338 
 7339   ins_cost(125); // XXX
 7340   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7341   ins_encode %{
 7342     __ movl($dst$$Register, $mem$$Address);
 7343   %}
 7344   ins_pipe(ialu_reg_mem); // XXX
 7345 %}
 7346 
 7347 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7348 %{
 7349   predicate(UseCompactObjectHeaders);
 7350   match(Set dst (LoadNKlass mem));
 7351   effect(KILL cr);
 7352   ins_cost(125);
 7353   format %{
 7354     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7355     "shrl    $dst, markWord::klass_shift_at_offset"
 7356   %}
 7357   ins_encode %{
 7358     __ movl($dst$$Register, $mem$$Address);
 7359     __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7360   %}
 7361   ins_pipe(ialu_reg_mem);
 7362 %}
 7363 
 7364 // Load Float
 7365 instruct loadF(regF dst, memory mem)
 7366 %{
 7367   match(Set dst (LoadF mem));
 7368 
 7369   ins_cost(145); // XXX
 7370   format %{ "movss   $dst, $mem\t# float" %}
 7371   ins_encode %{
 7372     __ movflt($dst$$XMMRegister, $mem$$Address);
 7373   %}
 7374   ins_pipe(pipe_slow); // XXX
 7375 %}
 7376 
 7377 // Load Double
 7378 instruct loadD_partial(regD dst, memory mem)
 7379 %{
 7380   predicate(!UseXmmLoadAndClearUpper);
 7381   match(Set dst (LoadD mem));
 7382 
 7383   ins_cost(145); // XXX
 7384   format %{ "movlpd  $dst, $mem\t# double" %}
 7385   ins_encode %{
 7386     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7387   %}
 7388   ins_pipe(pipe_slow); // XXX
 7389 %}
 7390 
 7391 instruct loadD(regD dst, memory mem)
 7392 %{
 7393   predicate(UseXmmLoadAndClearUpper);
 7394   match(Set dst (LoadD mem));
 7395 
 7396   ins_cost(145); // XXX
 7397   format %{ "movsd   $dst, $mem\t# double" %}
 7398   ins_encode %{
 7399     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7400   %}
 7401   ins_pipe(pipe_slow); // XXX
 7402 %}
 7403 
 7404 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
 7405 %{
 7406   match(Set dst con);
 7407 
 7408   format %{ "leaq  $dst, $con\t# AOT Runtime Constants Address" %}
 7409 
 7410   ins_encode %{
 7411     __ load_aotrc_address($dst$$Register, (address)$con$$constant);
 7412   %}
 7413 
 7414   ins_pipe(ialu_reg_fat);
 7415 %}
 7416 
 7417 // min = java.lang.Math.min(float a, float b)
 7418 // max = java.lang.Math.max(float a, float b)
 7419 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
 7420 %{
 7421   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7422   match(Set dst (MaxF a b));
 7423   match(Set dst (MinF a b));
 7424 
 7425   format %{ "minmaxF $dst, $a, $b" %}
 7426   ins_encode %{
 7427     int opcode = this->ideal_Opcode();
 7428     __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7429   %}
 7430   ins_pipe( pipe_slow );
 7431 %}
 7432 
 7433 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, rRegI rtmp, rFlagsReg cr)
 7434 %{
 7435   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7436   match(Set dst (MaxF a b));
 7437   match(Set dst (MinF a b));
 7438   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7439 
 7440   format %{ "minmaxF_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
 7441   ins_encode %{
 7442     int opcode = this->ideal_Opcode();
 7443     bool min = (opcode == Op_MinF) ? true : false;
 7444     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7445                     min, fp_prec_flt /*pt*/);
 7446   %}
 7447   ins_pipe( pipe_slow );
 7448 %}
 7449 
 7450 // min = java.lang.Math.min(float a, float b)
 7451 // max = java.lang.Math.max(float a, float b)
 7452 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
 7453 %{
 7454   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7455   match(Set dst (MaxF a b));
 7456   match(Set dst (MinF a b));
 7457   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7458 
 7459   format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7460   ins_encode %{
 7461     int opcode = this->ideal_Opcode();
 7462     int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
 7463     __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7464                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7465   %}
 7466   ins_pipe( pipe_slow );
 7467 %}
 7468 
 7469 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, rRegI rtmp, rFlagsReg cr)
 7470 %{
 7471   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7472   match(Set dst (MaxF a b));
 7473   match(Set dst (MinF a b));
 7474   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7475 
 7476   format %{ "minmaxF_reduction $dst, $a, $b \t!using $rtmp as TEMP" %}
 7477   ins_encode %{
 7478     int opcode = this->ideal_Opcode();
 7479     bool min = (opcode == Op_MinF) ? true : false;
 7480     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7481                     min, fp_prec_flt /*pt*/);
 7482   %}
 7483   ins_pipe( pipe_slow );
 7484 %}
 7485 
 7486 // min = java.lang.Math.min(double a, double b)
 7487 // max = java.lang.Math.max(double a, double b)
 7488 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
 7489 %{
 7490   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7491   match(Set dst (MaxD a b));
 7492   match(Set dst (MinD a b));
 7493 
 7494   format %{ "minmaxD $dst, $a, $b" %}
 7495   ins_encode %{
 7496     int opcode = this->ideal_Opcode();
 7497     __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7498   %}
 7499   ins_pipe( pipe_slow );
 7500 %}
 7501 
 7502 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, rRegI rtmp, rFlagsReg cr)
 7503 %{
 7504   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7505   match(Set dst (MaxD a b));
 7506   match(Set dst (MinD a b));
 7507   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7508 
 7509   format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
 7510   ins_encode %{
 7511     int opcode = this->ideal_Opcode();
 7512     bool min = (opcode == Op_MinD) ? true : false;
 7513     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7514                     min, fp_prec_dbl /*pt*/);
 7515   %}
 7516   ins_pipe( pipe_slow );
 7517 %}
 7518 
 7519 // min = java.lang.Math.min(double a, double b)
 7520 // max = java.lang.Math.max(double a, double b)
 7521 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
 7522 %{
 7523   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7524   match(Set dst (MaxD a b));
 7525   match(Set dst (MinD a b));
 7526   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7527 
 7528   format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7529   ins_encode %{
 7530     int opcode = this->ideal_Opcode();
 7531     int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
 7532     __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7533                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7534   %}
 7535   ins_pipe( pipe_slow );
 7536 %}
 7537 
 7538 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, rRegL rtmp, rFlagsReg cr)
 7539 %{
 7540   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7541   match(Set dst (MaxD a b));
 7542   match(Set dst (MinD a b));
 7543   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7544 
 7545   format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
 7546   ins_encode %{
 7547     int opcode = this->ideal_Opcode();
 7548     bool min = (opcode == Op_MinD) ? true : false;
 7549     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7550                     min, fp_prec_dbl /*pt*/);
 7551   %}
 7552   ins_pipe( pipe_slow );
 7553 %}
 7554 
 7555 // Load Effective Address
 7556 instruct leaP8(rRegP dst, indOffset8 mem)
 7557 %{
 7558   match(Set dst mem);
 7559 
 7560   ins_cost(110); // XXX
 7561   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7562   ins_encode %{
 7563     __ leaq($dst$$Register, $mem$$Address);
 7564   %}
 7565   ins_pipe(ialu_reg_reg_fat);
 7566 %}
 7567 
 7568 instruct leaP32(rRegP dst, indOffset32 mem)
 7569 %{
 7570   match(Set dst mem);
 7571 
 7572   ins_cost(110);
 7573   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7574   ins_encode %{
 7575     __ leaq($dst$$Register, $mem$$Address);
 7576   %}
 7577   ins_pipe(ialu_reg_reg_fat);
 7578 %}
 7579 
 7580 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7581 %{
 7582   match(Set dst mem);
 7583 
 7584   ins_cost(110);
 7585   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7586   ins_encode %{
 7587     __ leaq($dst$$Register, $mem$$Address);
 7588   %}
 7589   ins_pipe(ialu_reg_reg_fat);
 7590 %}
 7591 
 7592 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7593 %{
 7594   match(Set dst mem);
 7595 
 7596   ins_cost(110);
 7597   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7598   ins_encode %{
 7599     __ leaq($dst$$Register, $mem$$Address);
 7600   %}
 7601   ins_pipe(ialu_reg_reg_fat);
 7602 %}
 7603 
 7604 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7605 %{
 7606   match(Set dst mem);
 7607 
 7608   ins_cost(110);
 7609   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7610   ins_encode %{
 7611     __ leaq($dst$$Register, $mem$$Address);
 7612   %}
 7613   ins_pipe(ialu_reg_reg_fat);
 7614 %}
 7615 
 7616 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7617 %{
 7618   match(Set dst mem);
 7619 
 7620   ins_cost(110);
 7621   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7622   ins_encode %{
 7623     __ leaq($dst$$Register, $mem$$Address);
 7624   %}
 7625   ins_pipe(ialu_reg_reg_fat);
 7626 %}
 7627 
 7628 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7629 %{
 7630   match(Set dst mem);
 7631 
 7632   ins_cost(110);
 7633   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7634   ins_encode %{
 7635     __ leaq($dst$$Register, $mem$$Address);
 7636   %}
 7637   ins_pipe(ialu_reg_reg_fat);
 7638 %}
 7639 
 7640 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7641 %{
 7642   match(Set dst mem);
 7643 
 7644   ins_cost(110);
 7645   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7646   ins_encode %{
 7647     __ leaq($dst$$Register, $mem$$Address);
 7648   %}
 7649   ins_pipe(ialu_reg_reg_fat);
 7650 %}
 7651 
 7652 // Load Effective Address which uses Narrow (32-bits) oop
 7653 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7654 %{
 7655   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7656   match(Set dst mem);
 7657 
 7658   ins_cost(110);
 7659   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7660   ins_encode %{
 7661     __ leaq($dst$$Register, $mem$$Address);
 7662   %}
 7663   ins_pipe(ialu_reg_reg_fat);
 7664 %}
 7665 
 7666 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7667 %{
 7668   predicate(CompressedOops::shift() == 0);
 7669   match(Set dst mem);
 7670 
 7671   ins_cost(110); // XXX
 7672   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7673   ins_encode %{
 7674     __ leaq($dst$$Register, $mem$$Address);
 7675   %}
 7676   ins_pipe(ialu_reg_reg_fat);
 7677 %}
 7678 
 7679 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7680 %{
 7681   predicate(CompressedOops::shift() == 0);
 7682   match(Set dst mem);
 7683 
 7684   ins_cost(110);
 7685   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7686   ins_encode %{
 7687     __ leaq($dst$$Register, $mem$$Address);
 7688   %}
 7689   ins_pipe(ialu_reg_reg_fat);
 7690 %}
 7691 
 7692 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7693 %{
 7694   predicate(CompressedOops::shift() == 0);
 7695   match(Set dst mem);
 7696 
 7697   ins_cost(110);
 7698   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7699   ins_encode %{
 7700     __ leaq($dst$$Register, $mem$$Address);
 7701   %}
 7702   ins_pipe(ialu_reg_reg_fat);
 7703 %}
 7704 
 7705 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7706 %{
 7707   predicate(CompressedOops::shift() == 0);
 7708   match(Set dst mem);
 7709 
 7710   ins_cost(110);
 7711   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7712   ins_encode %{
 7713     __ leaq($dst$$Register, $mem$$Address);
 7714   %}
 7715   ins_pipe(ialu_reg_reg_fat);
 7716 %}
 7717 
 7718 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7719 %{
 7720   predicate(CompressedOops::shift() == 0);
 7721   match(Set dst mem);
 7722 
 7723   ins_cost(110);
 7724   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7725   ins_encode %{
 7726     __ leaq($dst$$Register, $mem$$Address);
 7727   %}
 7728   ins_pipe(ialu_reg_reg_fat);
 7729 %}
 7730 
 7731 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7732 %{
 7733   predicate(CompressedOops::shift() == 0);
 7734   match(Set dst mem);
 7735 
 7736   ins_cost(110);
 7737   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7738   ins_encode %{
 7739     __ leaq($dst$$Register, $mem$$Address);
 7740   %}
 7741   ins_pipe(ialu_reg_reg_fat);
 7742 %}
 7743 
 7744 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7745 %{
 7746   predicate(CompressedOops::shift() == 0);
 7747   match(Set dst mem);
 7748 
 7749   ins_cost(110);
 7750   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7751   ins_encode %{
 7752     __ leaq($dst$$Register, $mem$$Address);
 7753   %}
 7754   ins_pipe(ialu_reg_reg_fat);
 7755 %}
 7756 
 7757 instruct loadConI(rRegI dst, immI src)
 7758 %{
 7759   match(Set dst src);
 7760 
 7761   format %{ "movl    $dst, $src\t# int" %}
 7762   ins_encode %{
 7763     __ movl($dst$$Register, $src$$constant);
 7764   %}
 7765   ins_pipe(ialu_reg_fat); // XXX
 7766 %}
 7767 
 7768 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7769 %{
 7770   match(Set dst src);
 7771   effect(KILL cr);
 7772 
 7773   ins_cost(50);
 7774   format %{ "xorl    $dst, $dst\t# int" %}
 7775   ins_encode %{
 7776     __ xorl($dst$$Register, $dst$$Register);
 7777   %}
 7778   ins_pipe(ialu_reg);
 7779 %}
 7780 
 7781 instruct loadConL(rRegL dst, immL src)
 7782 %{
 7783   match(Set dst src);
 7784 
 7785   ins_cost(150);
 7786   format %{ "movq    $dst, $src\t# long" %}
 7787   ins_encode %{
 7788     __ mov64($dst$$Register, $src$$constant);
 7789   %}
 7790   ins_pipe(ialu_reg);
 7791 %}
 7792 
 7793 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7794 %{
 7795   match(Set dst src);
 7796   effect(KILL cr);
 7797 
 7798   ins_cost(50);
 7799   format %{ "xorl    $dst, $dst\t# long" %}
 7800   ins_encode %{
 7801     __ xorl($dst$$Register, $dst$$Register);
 7802   %}
 7803   ins_pipe(ialu_reg); // XXX
 7804 %}
 7805 
 7806 instruct loadConUL32(rRegL dst, immUL32 src)
 7807 %{
 7808   match(Set dst src);
 7809 
 7810   ins_cost(60);
 7811   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7812   ins_encode %{
 7813     __ movl($dst$$Register, $src$$constant);
 7814   %}
 7815   ins_pipe(ialu_reg);
 7816 %}
 7817 
 7818 instruct loadConL32(rRegL dst, immL32 src)
 7819 %{
 7820   match(Set dst src);
 7821 
 7822   ins_cost(70);
 7823   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7824   ins_encode %{
 7825     __ movq($dst$$Register, $src$$constant);
 7826   %}
 7827   ins_pipe(ialu_reg);
 7828 %}
 7829 
 7830 instruct loadConP(rRegP dst, immP con) %{
 7831   match(Set dst con);
 7832 
 7833   format %{ "movq    $dst, $con\t# ptr" %}
 7834   ins_encode %{
 7835     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7836   %}
 7837   ins_pipe(ialu_reg_fat); // XXX
 7838 %}
 7839 
 7840 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7841 %{
 7842   match(Set dst src);
 7843   effect(KILL cr);
 7844 
 7845   ins_cost(50);
 7846   format %{ "xorl    $dst, $dst\t# ptr" %}
 7847   ins_encode %{
 7848     __ xorl($dst$$Register, $dst$$Register);
 7849   %}
 7850   ins_pipe(ialu_reg);
 7851 %}
 7852 
 7853 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7854 %{
 7855   match(Set dst src);
 7856   effect(KILL cr);
 7857 
 7858   ins_cost(60);
 7859   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7860   ins_encode %{
 7861     __ movl($dst$$Register, $src$$constant);
 7862   %}
 7863   ins_pipe(ialu_reg);
 7864 %}
 7865 
 7866 instruct loadConF(regF dst, immF con) %{
 7867   match(Set dst con);
 7868   ins_cost(125);
 7869   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7870   ins_encode %{
 7871     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7872   %}
 7873   ins_pipe(pipe_slow);
 7874 %}
 7875 
 7876 instruct loadConH(regF dst, immH con) %{
 7877   match(Set dst con);
 7878   ins_cost(125);
 7879   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7880   ins_encode %{
 7881     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7882   %}
 7883   ins_pipe(pipe_slow);
 7884 %}
 7885 
 7886 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7887   match(Set dst src);
 7888   effect(KILL cr);
 7889   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7890   ins_encode %{
 7891     __ xorq($dst$$Register, $dst$$Register);
 7892   %}
 7893   ins_pipe(ialu_reg);
 7894 %}
 7895 
 7896 instruct loadConN(rRegN dst, immN src) %{
 7897   match(Set dst src);
 7898 
 7899   ins_cost(125);
 7900   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7901   ins_encode %{
 7902     address con = (address)$src$$constant;
 7903     if (con == nullptr) {
 7904       ShouldNotReachHere();
 7905     } else {
 7906       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7907     }
 7908   %}
 7909   ins_pipe(ialu_reg_fat); // XXX
 7910 %}
 7911 
 7912 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7913   match(Set dst src);
 7914 
 7915   ins_cost(125);
 7916   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7917   ins_encode %{
 7918     address con = (address)$src$$constant;
 7919     if (con == nullptr) {
 7920       ShouldNotReachHere();
 7921     } else {
 7922       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7923     }
 7924   %}
 7925   ins_pipe(ialu_reg_fat); // XXX
 7926 %}
 7927 
 7928 instruct loadConF0(regF dst, immF0 src)
 7929 %{
 7930   match(Set dst src);
 7931   ins_cost(100);
 7932 
 7933   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7934   ins_encode %{
 7935     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7936   %}
 7937   ins_pipe(pipe_slow);
 7938 %}
 7939 
 7940 // Use the same format since predicate() can not be used here.
 7941 instruct loadConD(regD dst, immD con) %{
 7942   match(Set dst con);
 7943   ins_cost(125);
 7944   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7945   ins_encode %{
 7946     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7947   %}
 7948   ins_pipe(pipe_slow);
 7949 %}
 7950 
 7951 instruct loadConD0(regD dst, immD0 src)
 7952 %{
 7953   match(Set dst src);
 7954   ins_cost(100);
 7955 
 7956   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7957   ins_encode %{
 7958     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7959   %}
 7960   ins_pipe(pipe_slow);
 7961 %}
 7962 
 7963 instruct loadSSI(rRegI dst, stackSlotI src)
 7964 %{
 7965   match(Set dst src);
 7966 
 7967   ins_cost(125);
 7968   format %{ "movl    $dst, $src\t# int stk" %}
 7969   ins_encode %{
 7970     __ movl($dst$$Register, $src$$Address);
 7971   %}
 7972   ins_pipe(ialu_reg_mem);
 7973 %}
 7974 
 7975 instruct loadSSL(rRegL dst, stackSlotL src)
 7976 %{
 7977   match(Set dst src);
 7978 
 7979   ins_cost(125);
 7980   format %{ "movq    $dst, $src\t# long stk" %}
 7981   ins_encode %{
 7982     __ movq($dst$$Register, $src$$Address);
 7983   %}
 7984   ins_pipe(ialu_reg_mem);
 7985 %}
 7986 
 7987 instruct loadSSP(rRegP dst, stackSlotP src)
 7988 %{
 7989   match(Set dst src);
 7990 
 7991   ins_cost(125);
 7992   format %{ "movq    $dst, $src\t# ptr stk" %}
 7993   ins_encode %{
 7994     __ movq($dst$$Register, $src$$Address);
 7995   %}
 7996   ins_pipe(ialu_reg_mem);
 7997 %}
 7998 
 7999 instruct loadSSF(regF dst, stackSlotF src)
 8000 %{
 8001   match(Set dst src);
 8002 
 8003   ins_cost(125);
 8004   format %{ "movss   $dst, $src\t# float stk" %}
 8005   ins_encode %{
 8006     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 8007   %}
 8008   ins_pipe(pipe_slow); // XXX
 8009 %}
 8010 
 8011 // Use the same format since predicate() can not be used here.
 8012 instruct loadSSD(regD dst, stackSlotD src)
 8013 %{
 8014   match(Set dst src);
 8015 
 8016   ins_cost(125);
 8017   format %{ "movsd   $dst, $src\t# double stk" %}
 8018   ins_encode  %{
 8019     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 8020   %}
 8021   ins_pipe(pipe_slow); // XXX
 8022 %}
 8023 
 8024 // Prefetch instructions for allocation.
 8025 // Must be safe to execute with invalid address (cannot fault).
 8026 
 8027 instruct prefetchAlloc( memory mem ) %{
 8028   predicate(AllocatePrefetchInstr==3);
 8029   match(PrefetchAllocation mem);
 8030   ins_cost(125);
 8031 
 8032   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 8033   ins_encode %{
 8034     __ prefetchw($mem$$Address);
 8035   %}
 8036   ins_pipe(ialu_mem);
 8037 %}
 8038 
 8039 instruct prefetchAllocNTA( memory mem ) %{
 8040   predicate(AllocatePrefetchInstr==0);
 8041   match(PrefetchAllocation mem);
 8042   ins_cost(125);
 8043 
 8044   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 8045   ins_encode %{
 8046     __ prefetchnta($mem$$Address);
 8047   %}
 8048   ins_pipe(ialu_mem);
 8049 %}
 8050 
 8051 instruct prefetchAllocT0( memory mem ) %{
 8052   predicate(AllocatePrefetchInstr==1);
 8053   match(PrefetchAllocation mem);
 8054   ins_cost(125);
 8055 
 8056   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 8057   ins_encode %{
 8058     __ prefetcht0($mem$$Address);
 8059   %}
 8060   ins_pipe(ialu_mem);
 8061 %}
 8062 
 8063 instruct prefetchAllocT2( memory mem ) %{
 8064   predicate(AllocatePrefetchInstr==2);
 8065   match(PrefetchAllocation mem);
 8066   ins_cost(125);
 8067 
 8068   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 8069   ins_encode %{
 8070     __ prefetcht2($mem$$Address);
 8071   %}
 8072   ins_pipe(ialu_mem);
 8073 %}
 8074 
 8075 //----------Store Instructions-------------------------------------------------
 8076 
 8077 // Store Byte
 8078 instruct storeB(memory mem, rRegI src)
 8079 %{
 8080   match(Set mem (StoreB mem src));
 8081 
 8082   ins_cost(125); // XXX
 8083   format %{ "movb    $mem, $src\t# byte" %}
 8084   ins_encode %{
 8085     __ movb($mem$$Address, $src$$Register);
 8086   %}
 8087   ins_pipe(ialu_mem_reg);
 8088 %}
 8089 
 8090 // Store Char/Short
 8091 instruct storeC(memory mem, rRegI src)
 8092 %{
 8093   match(Set mem (StoreC mem src));
 8094 
 8095   ins_cost(125); // XXX
 8096   format %{ "movw    $mem, $src\t# char/short" %}
 8097   ins_encode %{
 8098     __ movw($mem$$Address, $src$$Register);
 8099   %}
 8100   ins_pipe(ialu_mem_reg);
 8101 %}
 8102 
 8103 // Store Integer
 8104 instruct storeI(memory mem, rRegI src)
 8105 %{
 8106   match(Set mem (StoreI mem src));
 8107 
 8108   ins_cost(125); // XXX
 8109   format %{ "movl    $mem, $src\t# int" %}
 8110   ins_encode %{
 8111     __ movl($mem$$Address, $src$$Register);
 8112   %}
 8113   ins_pipe(ialu_mem_reg);
 8114 %}
 8115 
 8116 // Store Long
 8117 instruct storeL(memory mem, rRegL src)
 8118 %{
 8119   match(Set mem (StoreL mem src));
 8120 
 8121   ins_cost(125); // XXX
 8122   format %{ "movq    $mem, $src\t# long" %}
 8123   ins_encode %{
 8124     __ movq($mem$$Address, $src$$Register);
 8125   %}
 8126   ins_pipe(ialu_mem_reg); // XXX
 8127 %}
 8128 
 8129 // Store Pointer
 8130 instruct storeP(memory mem, any_RegP src)
 8131 %{
 8132   predicate(n->as_Store()->barrier_data() == 0);
 8133   match(Set mem (StoreP mem src));
 8134 
 8135   ins_cost(125); // XXX
 8136   format %{ "movq    $mem, $src\t# ptr" %}
 8137   ins_encode %{
 8138     __ movq($mem$$Address, $src$$Register);
 8139   %}
 8140   ins_pipe(ialu_mem_reg);
 8141 %}
 8142 
 8143 instruct storeImmP0(memory mem, immP0 zero)
 8144 %{
 8145   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8146   match(Set mem (StoreP mem zero));
 8147 
 8148   ins_cost(125); // XXX
 8149   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8150   ins_encode %{
 8151     __ movq($mem$$Address, r12);
 8152   %}
 8153   ins_pipe(ialu_mem_reg);
 8154 %}
 8155 
 8156 // Store Null Pointer, mark word, or other simple pointer constant.
 8157 instruct storeImmP(memory mem, immP31 src)
 8158 %{
 8159   predicate(n->as_Store()->barrier_data() == 0);
 8160   match(Set mem (StoreP mem src));
 8161 
 8162   ins_cost(150); // XXX
 8163   format %{ "movq    $mem, $src\t# ptr" %}
 8164   ins_encode %{
 8165     __ movq($mem$$Address, $src$$constant);
 8166   %}
 8167   ins_pipe(ialu_mem_imm);
 8168 %}
 8169 
 8170 // Store Compressed Pointer
 8171 instruct storeN(memory mem, rRegN src)
 8172 %{
 8173   predicate(n->as_Store()->barrier_data() == 0);
 8174   match(Set mem (StoreN mem src));
 8175 
 8176   ins_cost(125); // XXX
 8177   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8178   ins_encode %{
 8179     __ movl($mem$$Address, $src$$Register);
 8180   %}
 8181   ins_pipe(ialu_mem_reg);
 8182 %}
 8183 
 8184 instruct storeNKlass(memory mem, rRegN src)
 8185 %{
 8186   match(Set mem (StoreNKlass mem src));
 8187 
 8188   ins_cost(125); // XXX
 8189   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8190   ins_encode %{
 8191     __ movl($mem$$Address, $src$$Register);
 8192   %}
 8193   ins_pipe(ialu_mem_reg);
 8194 %}
 8195 
 8196 instruct storeImmN0(memory mem, immN0 zero)
 8197 %{
 8198   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8199   match(Set mem (StoreN mem zero));
 8200 
 8201   ins_cost(125); // XXX
 8202   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8203   ins_encode %{
 8204     __ movl($mem$$Address, r12);
 8205   %}
 8206   ins_pipe(ialu_mem_reg);
 8207 %}
 8208 
 8209 instruct storeImmN(memory mem, immN src)
 8210 %{
 8211   predicate(n->as_Store()->barrier_data() == 0);
 8212   match(Set mem (StoreN mem src));
 8213 
 8214   ins_cost(150); // XXX
 8215   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8216   ins_encode %{
 8217     address con = (address)$src$$constant;
 8218     if (con == nullptr) {
 8219       __ movl($mem$$Address, 0);
 8220     } else {
 8221       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8222     }
 8223   %}
 8224   ins_pipe(ialu_mem_imm);
 8225 %}
 8226 
 8227 instruct storeImmNKlass(memory mem, immNKlass src)
 8228 %{
 8229   match(Set mem (StoreNKlass mem src));
 8230 
 8231   ins_cost(150); // XXX
 8232   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8233   ins_encode %{
 8234     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8235   %}
 8236   ins_pipe(ialu_mem_imm);
 8237 %}
 8238 
 8239 // Store Integer Immediate
 8240 instruct storeImmI0(memory mem, immI_0 zero)
 8241 %{
 8242   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8243   match(Set mem (StoreI mem zero));
 8244 
 8245   ins_cost(125); // XXX
 8246   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8247   ins_encode %{
 8248     __ movl($mem$$Address, r12);
 8249   %}
 8250   ins_pipe(ialu_mem_reg);
 8251 %}
 8252 
 8253 instruct storeImmI(memory mem, immI src)
 8254 %{
 8255   match(Set mem (StoreI mem src));
 8256 
 8257   ins_cost(150);
 8258   format %{ "movl    $mem, $src\t# int" %}
 8259   ins_encode %{
 8260     __ movl($mem$$Address, $src$$constant);
 8261   %}
 8262   ins_pipe(ialu_mem_imm);
 8263 %}
 8264 
 8265 // Store Long Immediate
 8266 instruct storeImmL0(memory mem, immL0 zero)
 8267 %{
 8268   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8269   match(Set mem (StoreL mem zero));
 8270 
 8271   ins_cost(125); // XXX
 8272   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8273   ins_encode %{
 8274     __ movq($mem$$Address, r12);
 8275   %}
 8276   ins_pipe(ialu_mem_reg);
 8277 %}
 8278 
 8279 instruct storeImmL(memory mem, immL32 src)
 8280 %{
 8281   match(Set mem (StoreL mem src));
 8282 
 8283   ins_cost(150);
 8284   format %{ "movq    $mem, $src\t# long" %}
 8285   ins_encode %{
 8286     __ movq($mem$$Address, $src$$constant);
 8287   %}
 8288   ins_pipe(ialu_mem_imm);
 8289 %}
 8290 
 8291 // Store Short/Char Immediate
 8292 instruct storeImmC0(memory mem, immI_0 zero)
 8293 %{
 8294   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8295   match(Set mem (StoreC mem zero));
 8296 
 8297   ins_cost(125); // XXX
 8298   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8299   ins_encode %{
 8300     __ movw($mem$$Address, r12);
 8301   %}
 8302   ins_pipe(ialu_mem_reg);
 8303 %}
 8304 
 8305 instruct storeImmI16(memory mem, immI16 src)
 8306 %{
 8307   predicate(UseStoreImmI16);
 8308   match(Set mem (StoreC mem src));
 8309 
 8310   ins_cost(150);
 8311   format %{ "movw    $mem, $src\t# short/char" %}
 8312   ins_encode %{
 8313     __ movw($mem$$Address, $src$$constant);
 8314   %}
 8315   ins_pipe(ialu_mem_imm);
 8316 %}
 8317 
 8318 // Store Byte Immediate
 8319 instruct storeImmB0(memory mem, immI_0 zero)
 8320 %{
 8321   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8322   match(Set mem (StoreB mem zero));
 8323 
 8324   ins_cost(125); // XXX
 8325   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8326   ins_encode %{
 8327     __ movb($mem$$Address, r12);
 8328   %}
 8329   ins_pipe(ialu_mem_reg);
 8330 %}
 8331 
 8332 instruct storeImmB(memory mem, immI8 src)
 8333 %{
 8334   match(Set mem (StoreB mem src));
 8335 
 8336   ins_cost(150); // XXX
 8337   format %{ "movb    $mem, $src\t# byte" %}
 8338   ins_encode %{
 8339     __ movb($mem$$Address, $src$$constant);
 8340   %}
 8341   ins_pipe(ialu_mem_imm);
 8342 %}
 8343 
 8344 // Store Float
 8345 instruct storeF(memory mem, regF src)
 8346 %{
 8347   match(Set mem (StoreF mem src));
 8348 
 8349   ins_cost(95); // XXX
 8350   format %{ "movss   $mem, $src\t# float" %}
 8351   ins_encode %{
 8352     __ movflt($mem$$Address, $src$$XMMRegister);
 8353   %}
 8354   ins_pipe(pipe_slow); // XXX
 8355 %}
 8356 
 8357 // Store immediate Float value (it is faster than store from XMM register)
 8358 instruct storeF0(memory mem, immF0 zero)
 8359 %{
 8360   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8361   match(Set mem (StoreF mem zero));
 8362 
 8363   ins_cost(25); // XXX
 8364   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8365   ins_encode %{
 8366     __ movl($mem$$Address, r12);
 8367   %}
 8368   ins_pipe(ialu_mem_reg);
 8369 %}
 8370 
 8371 instruct storeF_imm(memory mem, immF src)
 8372 %{
 8373   match(Set mem (StoreF mem src));
 8374 
 8375   ins_cost(50);
 8376   format %{ "movl    $mem, $src\t# float" %}
 8377   ins_encode %{
 8378     __ movl($mem$$Address, jint_cast($src$$constant));
 8379   %}
 8380   ins_pipe(ialu_mem_imm);
 8381 %}
 8382 
 8383 // Store Double
 8384 instruct storeD(memory mem, regD src)
 8385 %{
 8386   match(Set mem (StoreD mem src));
 8387 
 8388   ins_cost(95); // XXX
 8389   format %{ "movsd   $mem, $src\t# double" %}
 8390   ins_encode %{
 8391     __ movdbl($mem$$Address, $src$$XMMRegister);
 8392   %}
 8393   ins_pipe(pipe_slow); // XXX
 8394 %}
 8395 
 8396 // Store immediate double 0.0 (it is faster than store from XMM register)
 8397 instruct storeD0_imm(memory mem, immD0 src)
 8398 %{
 8399   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8400   match(Set mem (StoreD mem src));
 8401 
 8402   ins_cost(50);
 8403   format %{ "movq    $mem, $src\t# double 0." %}
 8404   ins_encode %{
 8405     __ movq($mem$$Address, $src$$constant);
 8406   %}
 8407   ins_pipe(ialu_mem_imm);
 8408 %}
 8409 
 8410 instruct storeD0(memory mem, immD0 zero)
 8411 %{
 8412   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8413   match(Set mem (StoreD mem zero));
 8414 
 8415   ins_cost(25); // XXX
 8416   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8417   ins_encode %{
 8418     __ movq($mem$$Address, r12);
 8419   %}
 8420   ins_pipe(ialu_mem_reg);
 8421 %}
 8422 
 8423 instruct storeSSI(stackSlotI dst, rRegI src)
 8424 %{
 8425   match(Set dst src);
 8426 
 8427   ins_cost(100);
 8428   format %{ "movl    $dst, $src\t# int stk" %}
 8429   ins_encode %{
 8430     __ movl($dst$$Address, $src$$Register);
 8431   %}
 8432   ins_pipe( ialu_mem_reg );
 8433 %}
 8434 
 8435 instruct storeSSL(stackSlotL dst, rRegL src)
 8436 %{
 8437   match(Set dst src);
 8438 
 8439   ins_cost(100);
 8440   format %{ "movq    $dst, $src\t# long stk" %}
 8441   ins_encode %{
 8442     __ movq($dst$$Address, $src$$Register);
 8443   %}
 8444   ins_pipe(ialu_mem_reg);
 8445 %}
 8446 
 8447 instruct storeSSP(stackSlotP dst, rRegP src)
 8448 %{
 8449   match(Set dst src);
 8450 
 8451   ins_cost(100);
 8452   format %{ "movq    $dst, $src\t# ptr stk" %}
 8453   ins_encode %{
 8454     __ movq($dst$$Address, $src$$Register);
 8455   %}
 8456   ins_pipe(ialu_mem_reg);
 8457 %}
 8458 
 8459 instruct storeSSF(stackSlotF dst, regF src)
 8460 %{
 8461   match(Set dst src);
 8462 
 8463   ins_cost(95); // XXX
 8464   format %{ "movss   $dst, $src\t# float stk" %}
 8465   ins_encode %{
 8466     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8467   %}
 8468   ins_pipe(pipe_slow); // XXX
 8469 %}
 8470 
 8471 instruct storeSSD(stackSlotD dst, regD src)
 8472 %{
 8473   match(Set dst src);
 8474 
 8475   ins_cost(95); // XXX
 8476   format %{ "movsd   $dst, $src\t# double stk" %}
 8477   ins_encode %{
 8478     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8479   %}
 8480   ins_pipe(pipe_slow); // XXX
 8481 %}
 8482 
 8483 instruct cacheWB(indirect addr)
 8484 %{
 8485   predicate(VM_Version::supports_data_cache_line_flush());
 8486   match(CacheWB addr);
 8487 
 8488   ins_cost(100);
 8489   format %{"cache wb $addr" %}
 8490   ins_encode %{
 8491     assert($addr->index_position() < 0, "should be");
 8492     assert($addr$$disp == 0, "should be");
 8493     __ cache_wb(Address($addr$$base$$Register, 0));
 8494   %}
 8495   ins_pipe(pipe_slow); // XXX
 8496 %}
 8497 
 8498 instruct cacheWBPreSync()
 8499 %{
 8500   predicate(VM_Version::supports_data_cache_line_flush());
 8501   match(CacheWBPreSync);
 8502 
 8503   ins_cost(100);
 8504   format %{"cache wb presync" %}
 8505   ins_encode %{
 8506     __ cache_wbsync(true);
 8507   %}
 8508   ins_pipe(pipe_slow); // XXX
 8509 %}
 8510 
 8511 instruct cacheWBPostSync()
 8512 %{
 8513   predicate(VM_Version::supports_data_cache_line_flush());
 8514   match(CacheWBPostSync);
 8515 
 8516   ins_cost(100);
 8517   format %{"cache wb postsync" %}
 8518   ins_encode %{
 8519     __ cache_wbsync(false);
 8520   %}
 8521   ins_pipe(pipe_slow); // XXX
 8522 %}
 8523 
 8524 //----------BSWAP Instructions-------------------------------------------------
 8525 instruct bytes_reverse_int(rRegI dst) %{
 8526   match(Set dst (ReverseBytesI dst));
 8527 
 8528   format %{ "bswapl  $dst" %}
 8529   ins_encode %{
 8530     __ bswapl($dst$$Register);
 8531   %}
 8532   ins_pipe( ialu_reg );
 8533 %}
 8534 
 8535 instruct bytes_reverse_long(rRegL dst) %{
 8536   match(Set dst (ReverseBytesL dst));
 8537 
 8538   format %{ "bswapq  $dst" %}
 8539   ins_encode %{
 8540     __ bswapq($dst$$Register);
 8541   %}
 8542   ins_pipe( ialu_reg);
 8543 %}
 8544 
 8545 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8546   match(Set dst (ReverseBytesUS dst));
 8547   effect(KILL cr);
 8548 
 8549   format %{ "bswapl  $dst\n\t"
 8550             "shrl    $dst,16\n\t" %}
 8551   ins_encode %{
 8552     __ bswapl($dst$$Register);
 8553     __ shrl($dst$$Register, 16);
 8554   %}
 8555   ins_pipe( ialu_reg );
 8556 %}
 8557 
 8558 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8559   match(Set dst (ReverseBytesS dst));
 8560   effect(KILL cr);
 8561 
 8562   format %{ "bswapl  $dst\n\t"
 8563             "sar     $dst,16\n\t" %}
 8564   ins_encode %{
 8565     __ bswapl($dst$$Register);
 8566     __ sarl($dst$$Register, 16);
 8567   %}
 8568   ins_pipe( ialu_reg );
 8569 %}
 8570 
 8571 //---------- Zeros Count Instructions ------------------------------------------
 8572 
 8573 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8574   predicate(UseCountLeadingZerosInstruction);
 8575   match(Set dst (CountLeadingZerosI src));
 8576   effect(KILL cr);
 8577 
 8578   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8579   ins_encode %{
 8580     __ lzcntl($dst$$Register, $src$$Register);
 8581   %}
 8582   ins_pipe(ialu_reg);
 8583 %}
 8584 
 8585 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8586   predicate(UseCountLeadingZerosInstruction);
 8587   match(Set dst (CountLeadingZerosI (LoadI src)));
 8588   effect(KILL cr);
 8589   ins_cost(175);
 8590   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8591   ins_encode %{
 8592     __ lzcntl($dst$$Register, $src$$Address);
 8593   %}
 8594   ins_pipe(ialu_reg_mem);
 8595 %}
 8596 
 8597 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8598   predicate(!UseCountLeadingZerosInstruction);
 8599   match(Set dst (CountLeadingZerosI src));
 8600   effect(KILL cr);
 8601 
 8602   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8603             "jnz     skip\n\t"
 8604             "movl    $dst, -1\n"
 8605       "skip:\n\t"
 8606             "negl    $dst\n\t"
 8607             "addl    $dst, 31" %}
 8608   ins_encode %{
 8609     Register Rdst = $dst$$Register;
 8610     Register Rsrc = $src$$Register;
 8611     Label skip;
 8612     __ bsrl(Rdst, Rsrc);
 8613     __ jccb(Assembler::notZero, skip);
 8614     __ movl(Rdst, -1);
 8615     __ bind(skip);
 8616     __ negl(Rdst);
 8617     __ addl(Rdst, BitsPerInt - 1);
 8618   %}
 8619   ins_pipe(ialu_reg);
 8620 %}
 8621 
 8622 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8623   predicate(UseCountLeadingZerosInstruction);
 8624   match(Set dst (CountLeadingZerosL src));
 8625   effect(KILL cr);
 8626 
 8627   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8628   ins_encode %{
 8629     __ lzcntq($dst$$Register, $src$$Register);
 8630   %}
 8631   ins_pipe(ialu_reg);
 8632 %}
 8633 
 8634 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8635   predicate(UseCountLeadingZerosInstruction);
 8636   match(Set dst (CountLeadingZerosL (LoadL src)));
 8637   effect(KILL cr);
 8638   ins_cost(175);
 8639   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8640   ins_encode %{
 8641     __ lzcntq($dst$$Register, $src$$Address);
 8642   %}
 8643   ins_pipe(ialu_reg_mem);
 8644 %}
 8645 
 8646 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8647   predicate(!UseCountLeadingZerosInstruction);
 8648   match(Set dst (CountLeadingZerosL src));
 8649   effect(KILL cr);
 8650 
 8651   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8652             "jnz     skip\n\t"
 8653             "movl    $dst, -1\n"
 8654       "skip:\n\t"
 8655             "negl    $dst\n\t"
 8656             "addl    $dst, 63" %}
 8657   ins_encode %{
 8658     Register Rdst = $dst$$Register;
 8659     Register Rsrc = $src$$Register;
 8660     Label skip;
 8661     __ bsrq(Rdst, Rsrc);
 8662     __ jccb(Assembler::notZero, skip);
 8663     __ movl(Rdst, -1);
 8664     __ bind(skip);
 8665     __ negl(Rdst);
 8666     __ addl(Rdst, BitsPerLong - 1);
 8667   %}
 8668   ins_pipe(ialu_reg);
 8669 %}
 8670 
 8671 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8672   predicate(UseCountTrailingZerosInstruction);
 8673   match(Set dst (CountTrailingZerosI src));
 8674   effect(KILL cr);
 8675 
 8676   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8677   ins_encode %{
 8678     __ tzcntl($dst$$Register, $src$$Register);
 8679   %}
 8680   ins_pipe(ialu_reg);
 8681 %}
 8682 
 8683 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8684   predicate(UseCountTrailingZerosInstruction);
 8685   match(Set dst (CountTrailingZerosI (LoadI src)));
 8686   effect(KILL cr);
 8687   ins_cost(175);
 8688   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8689   ins_encode %{
 8690     __ tzcntl($dst$$Register, $src$$Address);
 8691   %}
 8692   ins_pipe(ialu_reg_mem);
 8693 %}
 8694 
 8695 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8696   predicate(!UseCountTrailingZerosInstruction);
 8697   match(Set dst (CountTrailingZerosI src));
 8698   effect(KILL cr);
 8699 
 8700   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8701             "jnz     done\n\t"
 8702             "movl    $dst, 32\n"
 8703       "done:" %}
 8704   ins_encode %{
 8705     Register Rdst = $dst$$Register;
 8706     Label done;
 8707     __ bsfl(Rdst, $src$$Register);
 8708     __ jccb(Assembler::notZero, done);
 8709     __ movl(Rdst, BitsPerInt);
 8710     __ bind(done);
 8711   %}
 8712   ins_pipe(ialu_reg);
 8713 %}
 8714 
 8715 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8716   predicate(UseCountTrailingZerosInstruction);
 8717   match(Set dst (CountTrailingZerosL src));
 8718   effect(KILL cr);
 8719 
 8720   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8721   ins_encode %{
 8722     __ tzcntq($dst$$Register, $src$$Register);
 8723   %}
 8724   ins_pipe(ialu_reg);
 8725 %}
 8726 
 8727 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8728   predicate(UseCountTrailingZerosInstruction);
 8729   match(Set dst (CountTrailingZerosL (LoadL src)));
 8730   effect(KILL cr);
 8731   ins_cost(175);
 8732   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8733   ins_encode %{
 8734     __ tzcntq($dst$$Register, $src$$Address);
 8735   %}
 8736   ins_pipe(ialu_reg_mem);
 8737 %}
 8738 
 8739 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8740   predicate(!UseCountTrailingZerosInstruction);
 8741   match(Set dst (CountTrailingZerosL src));
 8742   effect(KILL cr);
 8743 
 8744   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8745             "jnz     done\n\t"
 8746             "movl    $dst, 64\n"
 8747       "done:" %}
 8748   ins_encode %{
 8749     Register Rdst = $dst$$Register;
 8750     Label done;
 8751     __ bsfq(Rdst, $src$$Register);
 8752     __ jccb(Assembler::notZero, done);
 8753     __ movl(Rdst, BitsPerLong);
 8754     __ bind(done);
 8755   %}
 8756   ins_pipe(ialu_reg);
 8757 %}
 8758 
 8759 //--------------- Reverse Operation Instructions ----------------
 8760 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8761   predicate(!VM_Version::supports_gfni());
 8762   match(Set dst (ReverseI src));
 8763   effect(TEMP dst, TEMP rtmp, KILL cr);
 8764   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8765   ins_encode %{
 8766     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8767   %}
 8768   ins_pipe( ialu_reg );
 8769 %}
 8770 
 8771 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8772   predicate(VM_Version::supports_gfni());
 8773   match(Set dst (ReverseI src));
 8774   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8775   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8776   ins_encode %{
 8777     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8778   %}
 8779   ins_pipe( ialu_reg );
 8780 %}
 8781 
 8782 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8783   predicate(!VM_Version::supports_gfni());
 8784   match(Set dst (ReverseL src));
 8785   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8786   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8787   ins_encode %{
 8788     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8789   %}
 8790   ins_pipe( ialu_reg );
 8791 %}
 8792 
 8793 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8794   predicate(VM_Version::supports_gfni());
 8795   match(Set dst (ReverseL src));
 8796   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8797   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8798   ins_encode %{
 8799     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8800   %}
 8801   ins_pipe( ialu_reg );
 8802 %}
 8803 
 8804 //---------- Population Count Instructions -------------------------------------
 8805 
 8806 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8807   predicate(UsePopCountInstruction);
 8808   match(Set dst (PopCountI src));
 8809   effect(KILL cr);
 8810 
 8811   format %{ "popcnt  $dst, $src" %}
 8812   ins_encode %{
 8813     __ popcntl($dst$$Register, $src$$Register);
 8814   %}
 8815   ins_pipe(ialu_reg);
 8816 %}
 8817 
 8818 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8819   predicate(UsePopCountInstruction);
 8820   match(Set dst (PopCountI (LoadI mem)));
 8821   effect(KILL cr);
 8822 
 8823   format %{ "popcnt  $dst, $mem" %}
 8824   ins_encode %{
 8825     __ popcntl($dst$$Register, $mem$$Address);
 8826   %}
 8827   ins_pipe(ialu_reg);
 8828 %}
 8829 
 8830 // Note: Long.bitCount(long) returns an int.
 8831 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8832   predicate(UsePopCountInstruction);
 8833   match(Set dst (PopCountL src));
 8834   effect(KILL cr);
 8835 
 8836   format %{ "popcnt  $dst, $src" %}
 8837   ins_encode %{
 8838     __ popcntq($dst$$Register, $src$$Register);
 8839   %}
 8840   ins_pipe(ialu_reg);
 8841 %}
 8842 
 8843 // Note: Long.bitCount(long) returns an int.
 8844 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8845   predicate(UsePopCountInstruction);
 8846   match(Set dst (PopCountL (LoadL mem)));
 8847   effect(KILL cr);
 8848 
 8849   format %{ "popcnt  $dst, $mem" %}
 8850   ins_encode %{
 8851     __ popcntq($dst$$Register, $mem$$Address);
 8852   %}
 8853   ins_pipe(ialu_reg);
 8854 %}
 8855 
 8856 
 8857 //----------MemBar Instructions-----------------------------------------------
 8858 // Memory barrier flavors
 8859 
 8860 instruct membar_acquire()
 8861 %{
 8862   match(MemBarAcquire);
 8863   match(LoadFence);
 8864   ins_cost(0);
 8865 
 8866   size(0);
 8867   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8868   ins_encode();
 8869   ins_pipe(empty);
 8870 %}
 8871 
 8872 instruct membar_acquire_lock()
 8873 %{
 8874   match(MemBarAcquireLock);
 8875   ins_cost(0);
 8876 
 8877   size(0);
 8878   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8879   ins_encode();
 8880   ins_pipe(empty);
 8881 %}
 8882 
 8883 instruct membar_release()
 8884 %{
 8885   match(MemBarRelease);
 8886   match(StoreFence);
 8887   ins_cost(0);
 8888 
 8889   size(0);
 8890   format %{ "MEMBAR-release ! (empty encoding)" %}
 8891   ins_encode();
 8892   ins_pipe(empty);
 8893 %}
 8894 
 8895 instruct membar_release_lock()
 8896 %{
 8897   match(MemBarReleaseLock);
 8898   ins_cost(0);
 8899 
 8900   size(0);
 8901   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8902   ins_encode();
 8903   ins_pipe(empty);
 8904 %}
 8905 
 8906 instruct membar_storeload(rFlagsReg cr) %{
 8907   match(MemBarStoreLoad);
 8908   effect(KILL cr);
 8909   ins_cost(400);
 8910 
 8911   format %{
 8912     $$template
 8913     $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
 8914   %}
 8915   ins_encode %{
 8916     __ membar(Assembler::StoreLoad);
 8917   %}
 8918   ins_pipe(pipe_slow);
 8919 %}
 8920 
 8921 instruct membar_volatile(rFlagsReg cr) %{
 8922   match(MemBarVolatile);
 8923   effect(KILL cr);
 8924   ins_cost(400);
 8925 
 8926   format %{
 8927     $$template
 8928     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8929   %}
 8930   ins_encode %{
 8931     __ membar(Assembler::StoreLoad);
 8932   %}
 8933   ins_pipe(pipe_slow);
 8934 %}
 8935 
 8936 instruct unnecessary_membar_volatile()
 8937 %{
 8938   match(MemBarVolatile);
 8939   predicate(Matcher::post_store_load_barrier(n));
 8940   ins_cost(0);
 8941 
 8942   size(0);
 8943   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8944   ins_encode();
 8945   ins_pipe(empty);
 8946 %}
 8947 
 8948 instruct membar_full(rFlagsReg cr) %{
 8949   match(MemBarFull);
 8950   effect(KILL cr);
 8951   ins_cost(400);
 8952 
 8953   format %{
 8954     $$template
 8955     $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
 8956   %}
 8957   ins_encode %{
 8958     __ membar(Assembler::StoreLoad);
 8959   %}
 8960   ins_pipe(pipe_slow);
 8961 %}
 8962 
 8963 instruct membar_storestore() %{
 8964   match(MemBarStoreStore);
 8965   match(StoreStoreFence);
 8966   ins_cost(0);
 8967 
 8968   size(0);
 8969   format %{ "MEMBAR-storestore (empty encoding)" %}
 8970   ins_encode( );
 8971   ins_pipe(empty);
 8972 %}
 8973 
 8974 //----------Move Instructions--------------------------------------------------
 8975 
 8976 instruct castX2P(rRegP dst, rRegL src)
 8977 %{
 8978   match(Set dst (CastX2P src));
 8979 
 8980   format %{ "movq    $dst, $src\t# long->ptr" %}
 8981   ins_encode %{
 8982     if ($dst$$reg != $src$$reg) {
 8983       __ movptr($dst$$Register, $src$$Register);
 8984     }
 8985   %}
 8986   ins_pipe(ialu_reg_reg); // XXX
 8987 %}
 8988 
 8989 instruct castI2N(rRegN dst, rRegI src)
 8990 %{
 8991   match(Set dst (CastI2N src));
 8992 
 8993   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 8994   ins_encode %{
 8995     if ($dst$$reg != $src$$reg) {
 8996       __ movl($dst$$Register, $src$$Register);
 8997     }
 8998   %}
 8999   ins_pipe(ialu_reg_reg); // XXX
 9000 %}
 9001 
 9002 instruct castN2X(rRegL dst, rRegN src)
 9003 %{
 9004   match(Set dst (CastP2X src));
 9005 
 9006   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9007   ins_encode %{
 9008     if ($dst$$reg != $src$$reg) {
 9009       __ movptr($dst$$Register, $src$$Register);
 9010     }
 9011   %}
 9012   ins_pipe(ialu_reg_reg); // XXX
 9013 %}
 9014 
 9015 instruct castP2X(rRegL dst, rRegP src)
 9016 %{
 9017   match(Set dst (CastP2X src));
 9018 
 9019   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9020   ins_encode %{
 9021     if ($dst$$reg != $src$$reg) {
 9022       __ movptr($dst$$Register, $src$$Register);
 9023     }
 9024   %}
 9025   ins_pipe(ialu_reg_reg); // XXX
 9026 %}
 9027 
 9028 // Convert oop into int for vectors alignment masking
 9029 instruct convP2I(rRegI dst, rRegP src)
 9030 %{
 9031   match(Set dst (ConvL2I (CastP2X src)));
 9032 
 9033   format %{ "movl    $dst, $src\t# ptr -> int" %}
 9034   ins_encode %{
 9035     __ movl($dst$$Register, $src$$Register);
 9036   %}
 9037   ins_pipe(ialu_reg_reg); // XXX
 9038 %}
 9039 
 9040 // Convert compressed oop into int for vectors alignment masking
 9041 // in case of 32bit oops (heap < 4Gb).
 9042 instruct convN2I(rRegI dst, rRegN src)
 9043 %{
 9044   predicate(CompressedOops::shift() == 0);
 9045   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 9046 
 9047   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 9048   ins_encode %{
 9049     __ movl($dst$$Register, $src$$Register);
 9050   %}
 9051   ins_pipe(ialu_reg_reg); // XXX
 9052 %}
 9053 
 9054 // Convert oop pointer into compressed form
 9055 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 9056   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 9057   match(Set dst (EncodeP src));
 9058   effect(KILL cr);
 9059   format %{ "encode_heap_oop $dst,$src" %}
 9060   ins_encode %{
 9061     Register s = $src$$Register;
 9062     Register d = $dst$$Register;
 9063     if (s != d) {
 9064       __ movq(d, s);
 9065     }
 9066     __ encode_heap_oop(d);
 9067   %}
 9068   ins_pipe(ialu_reg_long);
 9069 %}
 9070 
 9071 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9072   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 9073   match(Set dst (EncodeP src));
 9074   effect(KILL cr);
 9075   format %{ "encode_heap_oop_not_null $dst,$src" %}
 9076   ins_encode %{
 9077     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 9078   %}
 9079   ins_pipe(ialu_reg_long);
 9080 %}
 9081 
 9082 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 9083   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 9084             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 9085   match(Set dst (DecodeN src));
 9086   effect(KILL cr);
 9087   format %{ "decode_heap_oop $dst,$src" %}
 9088   ins_encode %{
 9089     Register s = $src$$Register;
 9090     Register d = $dst$$Register;
 9091     if (s != d) {
 9092       __ movq(d, s);
 9093     }
 9094     __ decode_heap_oop(d);
 9095   %}
 9096   ins_pipe(ialu_reg_long);
 9097 %}
 9098 
 9099 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9100   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 9101             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 9102   match(Set dst (DecodeN src));
 9103   effect(KILL cr);
 9104   format %{ "decode_heap_oop_not_null $dst,$src" %}
 9105   ins_encode %{
 9106     Register s = $src$$Register;
 9107     Register d = $dst$$Register;
 9108     if (s != d) {
 9109       __ decode_heap_oop_not_null(d, s);
 9110     } else {
 9111       __ decode_heap_oop_not_null(d);
 9112     }
 9113   %}
 9114   ins_pipe(ialu_reg_long);
 9115 %}
 9116 
 9117 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9118   match(Set dst (EncodePKlass src));
 9119   effect(TEMP dst, KILL cr);
 9120   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 9121   ins_encode %{
 9122     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9123   %}
 9124   ins_pipe(ialu_reg_long);
 9125 %}
 9126 
 9127 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9128   match(Set dst (DecodeNKlass src));
 9129   effect(TEMP dst, KILL cr);
 9130   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 9131   ins_encode %{
 9132     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9133   %}
 9134   ins_pipe(ialu_reg_long);
 9135 %}
 9136 
 9137 //----------Conditional Move---------------------------------------------------
 9138 // Jump
 9139 // dummy instruction for generating temp registers
 9140 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 9141   match(Jump (LShiftL switch_val shift));
 9142   ins_cost(350);
 9143   predicate(false);
 9144   effect(TEMP dest);
 9145 
 9146   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9147             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 9148   ins_encode %{
 9149     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9150     // to do that and the compiler is using that register as one it can allocate.
 9151     // So we build it all by hand.
 9152     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9153     // ArrayAddress dispatch(table, index);
 9154     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9155     __ lea($dest$$Register, $constantaddress);
 9156     __ jmp(dispatch);
 9157   %}
 9158   ins_pipe(pipe_jmp);
 9159 %}
 9160 
 9161 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9162   match(Jump (AddL (LShiftL switch_val shift) offset));
 9163   ins_cost(350);
 9164   effect(TEMP dest);
 9165 
 9166   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9167             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9168   ins_encode %{
 9169     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9170     // to do that and the compiler is using that register as one it can allocate.
 9171     // So we build it all by hand.
 9172     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9173     // ArrayAddress dispatch(table, index);
 9174     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9175     __ lea($dest$$Register, $constantaddress);
 9176     __ jmp(dispatch);
 9177   %}
 9178   ins_pipe(pipe_jmp);
 9179 %}
 9180 
 9181 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9182   match(Jump switch_val);
 9183   ins_cost(350);
 9184   effect(TEMP dest);
 9185 
 9186   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9187             "jmp     [$dest + $switch_val]\n\t" %}
 9188   ins_encode %{
 9189     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9190     // to do that and the compiler is using that register as one it can allocate.
 9191     // So we build it all by hand.
 9192     // Address index(noreg, switch_reg, Address::times_1);
 9193     // ArrayAddress dispatch(table, index);
 9194     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9195     __ lea($dest$$Register, $constantaddress);
 9196     __ jmp(dispatch);
 9197   %}
 9198   ins_pipe(pipe_jmp);
 9199 %}
 9200 
 9201 // Conditional move
 9202 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9203 %{
 9204   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9205   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9206 
 9207   ins_cost(100); // XXX
 9208   format %{ "setbn$cop $dst\t# signed, int" %}
 9209   ins_encode %{
 9210     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9211     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9212   %}
 9213   ins_pipe(ialu_reg);
 9214 %}
 9215 
 9216 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9217 %{
 9218   predicate(!UseAPX);
 9219   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9220 
 9221   ins_cost(200); // XXX
 9222   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9223   ins_encode %{
 9224     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9225   %}
 9226   ins_pipe(pipe_cmov_reg);
 9227 %}
 9228 
 9229 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9230 %{
 9231   predicate(UseAPX);
 9232   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9233 
 9234   ins_cost(200);
 9235   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9236   ins_encode %{
 9237     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9238   %}
 9239   ins_pipe(pipe_cmov_reg);
 9240 %}
 9241 
 9242 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9243 %{
 9244   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9245   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9246 
 9247   ins_cost(100); // XXX
 9248   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9249   ins_encode %{
 9250     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9251     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9252   %}
 9253   ins_pipe(ialu_reg);
 9254 %}
 9255 
 9256 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9257   predicate(!UseAPX);
 9258   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9259 
 9260   ins_cost(200); // XXX
 9261   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9262   ins_encode %{
 9263     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9264   %}
 9265   ins_pipe(pipe_cmov_reg);
 9266 %}
 9267 
 9268 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9269   predicate(UseAPX);
 9270   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9271 
 9272   ins_cost(200);
 9273   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9274   ins_encode %{
 9275     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9276   %}
 9277   ins_pipe(pipe_cmov_reg);
 9278 %}
 9279 
 9280 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9281 %{
 9282   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9283   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9284 
 9285   ins_cost(100); // XXX
 9286   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9287   ins_encode %{
 9288     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9289     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9290   %}
 9291   ins_pipe(ialu_reg);
 9292 %}
 9293 
 9294 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9295 %{
 9296   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9297   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9298 
 9299   ins_cost(100); // XXX
 9300   format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
 9301   ins_encode %{
 9302     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9303     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9304   %}
 9305   ins_pipe(ialu_reg);
 9306 %}
 9307 
 9308 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9309   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9310 
 9311   ins_cost(200);
 9312   expand %{
 9313     cmovI_regU(cop, cr, dst, src);
 9314   %}
 9315 %}
 9316 
 9317 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
 9318   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9319 
 9320   ins_cost(200);
 9321   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9322   ins_encode %{
 9323     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9324   %}
 9325   ins_pipe(pipe_cmov_reg);
 9326 %}
 9327 
 9328 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9329   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9330   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9331 
 9332   ins_cost(200); // XXX
 9333   format %{ "cmovpl  $dst, $src\n\t"
 9334             "cmovnel $dst, $src" %}
 9335   ins_encode %{
 9336     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9337     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9338   %}
 9339   ins_pipe(pipe_cmov_reg);
 9340 %}
 9341 
 9342 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9343 // inputs of the CMove
 9344 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9345   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9346   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9347   effect(TEMP dst);
 9348 
 9349   ins_cost(200); // XXX
 9350   format %{ "cmovpl  $dst, $src\n\t"
 9351             "cmovnel $dst, $src" %}
 9352   ins_encode %{
 9353     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9354     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9355   %}
 9356   ins_pipe(pipe_cmov_reg);
 9357 %}
 9358 
 9359 // Conditional move
 9360 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9361   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9362 
 9363   ins_cost(250); // XXX
 9364   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9365   ins_encode %{
 9366     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9367   %}
 9368   ins_pipe(pipe_cmov_mem);
 9369 %}
 9370 
 9371 // Conditional move
 9372 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9373 %{
 9374   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9375 
 9376   ins_cost(250); // XXX
 9377   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9378   ins_encode %{
 9379     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9380   %}
 9381   ins_pipe(pipe_cmov_mem);
 9382 %}
 9383 
 9384 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9385   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9386 
 9387   ins_cost(250);
 9388   expand %{
 9389     cmovI_memU(cop, cr, dst, src);
 9390   %}
 9391 %}
 9392 
 9393 instruct cmovI_memUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI dst, memory src) %{
 9394   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9395 
 9396   ins_cost(250); // XXX
 9397   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9398   ins_encode %{
 9399     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9400   %}
 9401   ins_pipe(pipe_cmov_mem);
 9402 %}
 9403 
 9404 // Conditional move
 9405 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9406 %{
 9407   predicate(!UseAPX);
 9408   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9409 
 9410   ins_cost(200); // XXX
 9411   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9412   ins_encode %{
 9413     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9414   %}
 9415   ins_pipe(pipe_cmov_reg);
 9416 %}
 9417 
 9418 // Conditional move ndd
 9419 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9420 %{
 9421   predicate(UseAPX);
 9422   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9423 
 9424   ins_cost(200);
 9425   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9426   ins_encode %{
 9427     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9428   %}
 9429   ins_pipe(pipe_cmov_reg);
 9430 %}
 9431 
 9432 // Conditional move
 9433 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9434 %{
 9435   predicate(!UseAPX);
 9436   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9437 
 9438   ins_cost(200); // XXX
 9439   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9440   ins_encode %{
 9441     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9442   %}
 9443   ins_pipe(pipe_cmov_reg);
 9444 %}
 9445 
 9446 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9447   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9448 
 9449   ins_cost(200);
 9450   expand %{
 9451     cmovN_regU(cop, cr, dst, src);
 9452   %}
 9453 %}
 9454 
 9455 // Conditional move ndd
 9456 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9457 %{
 9458   predicate(UseAPX);
 9459   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9460 
 9461   ins_cost(200);
 9462   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9463   ins_encode %{
 9464     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9465   %}
 9466   ins_pipe(pipe_cmov_reg);
 9467 %}
 9468 
 9469 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
 9470   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9471 
 9472   ins_cost(200);
 9473   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
 9474   ins_encode %{
 9475     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9476   %}
 9477   ins_pipe(pipe_cmov_reg);
 9478 %}
 9479 
 9480 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9481   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9482   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9483 
 9484   ins_cost(200); // XXX
 9485   format %{ "cmovpl  $dst, $src\n\t"
 9486             "cmovnel $dst, $src" %}
 9487   ins_encode %{
 9488     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9489     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9490   %}
 9491   ins_pipe(pipe_cmov_reg);
 9492 %}
 9493 
 9494 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9495 // inputs of the CMove
 9496 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9497   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9498   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9499 
 9500   ins_cost(200); // XXX
 9501   format %{ "cmovpl  $dst, $src\n\t"
 9502             "cmovnel $dst, $src" %}
 9503   ins_encode %{
 9504     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9505     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9506   %}
 9507   ins_pipe(pipe_cmov_reg);
 9508 %}
 9509 
 9510 // Conditional move
 9511 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9512 %{
 9513   predicate(!UseAPX);
 9514   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9515 
 9516   ins_cost(200); // XXX
 9517   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9518   ins_encode %{
 9519     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9520   %}
 9521   ins_pipe(pipe_cmov_reg);  // XXX
 9522 %}
 9523 
 9524 // Conditional move ndd
 9525 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9526 %{
 9527   predicate(UseAPX);
 9528   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9529 
 9530   ins_cost(200);
 9531   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9532   ins_encode %{
 9533     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9534   %}
 9535   ins_pipe(pipe_cmov_reg);
 9536 %}
 9537 
 9538 // Conditional move
 9539 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9540 %{
 9541   predicate(!UseAPX);
 9542   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9543 
 9544   ins_cost(200); // XXX
 9545   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9546   ins_encode %{
 9547     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9548   %}
 9549   ins_pipe(pipe_cmov_reg); // XXX
 9550 %}
 9551 
 9552 // Conditional move ndd
 9553 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9554 %{
 9555   predicate(UseAPX);
 9556   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9557 
 9558   ins_cost(200);
 9559   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9560   ins_encode %{
 9561     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9562   %}
 9563   ins_pipe(pipe_cmov_reg);
 9564 %}
 9565 
 9566 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9567   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9568 
 9569   ins_cost(200);
 9570   expand %{
 9571     cmovP_regU(cop, cr, dst, src);
 9572   %}
 9573 %}
 9574 
 9575 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
 9576   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9577 
 9578   ins_cost(200);
 9579   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
 9580   ins_encode %{
 9581     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9582   %}
 9583   ins_pipe(pipe_cmov_reg);
 9584 %}
 9585 
 9586 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9587   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9588   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9589 
 9590   ins_cost(200); // XXX
 9591   format %{ "cmovpq  $dst, $src\n\t"
 9592             "cmovneq $dst, $src" %}
 9593   ins_encode %{
 9594     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9595     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9596   %}
 9597   ins_pipe(pipe_cmov_reg);
 9598 %}
 9599 
 9600 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9601 // inputs of the CMove
 9602 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9603   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9604   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9605 
 9606   ins_cost(200); // XXX
 9607   format %{ "cmovpq  $dst, $src\n\t"
 9608             "cmovneq $dst, $src" %}
 9609   ins_encode %{
 9610     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9611     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9612   %}
 9613   ins_pipe(pipe_cmov_reg);
 9614 %}
 9615 
 9616 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9617 %{
 9618   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9619   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9620 
 9621   ins_cost(100); // XXX
 9622   format %{ "setbn$cop $dst\t# signed, long" %}
 9623   ins_encode %{
 9624     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9625     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9626   %}
 9627   ins_pipe(ialu_reg);
 9628 %}
 9629 
 9630 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9631 %{
 9632   predicate(!UseAPX);
 9633   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9634 
 9635   ins_cost(200); // XXX
 9636   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9637   ins_encode %{
 9638     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9639   %}
 9640   ins_pipe(pipe_cmov_reg);  // XXX
 9641 %}
 9642 
 9643 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9644 %{
 9645   predicate(UseAPX);
 9646   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9647 
 9648   ins_cost(200);
 9649   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9650   ins_encode %{
 9651     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9652   %}
 9653   ins_pipe(pipe_cmov_reg);
 9654 %}
 9655 
 9656 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9657 %{
 9658   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9659 
 9660   ins_cost(200); // XXX
 9661   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9662   ins_encode %{
 9663     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9664   %}
 9665   ins_pipe(pipe_cmov_mem);  // XXX
 9666 %}
 9667 
 9668 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9669 %{
 9670   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9671   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9672 
 9673   ins_cost(100); // XXX
 9674   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9675   ins_encode %{
 9676     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9677     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9678   %}
 9679   ins_pipe(ialu_reg);
 9680 %}
 9681 
 9682 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9683 %{
 9684   predicate(!UseAPX);
 9685   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9686 
 9687   ins_cost(200); // XXX
 9688   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9689   ins_encode %{
 9690     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9691   %}
 9692   ins_pipe(pipe_cmov_reg); // XXX
 9693 %}
 9694 
 9695 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9696 %{
 9697   predicate(UseAPX);
 9698   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9699 
 9700   ins_cost(200);
 9701   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9702   ins_encode %{
 9703     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9704   %}
 9705   ins_pipe(pipe_cmov_reg);
 9706 %}
 9707 
 9708 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9709 %{
 9710   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9711   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9712 
 9713   ins_cost(100); // XXX
 9714   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9715   ins_encode %{
 9716     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9717     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9718   %}
 9719   ins_pipe(ialu_reg);
 9720 %}
 9721 
 9722 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9723 %{
 9724   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9725   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9726 
 9727   ins_cost(100); // XXX
 9728   format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
 9729   ins_encode %{
 9730     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9731     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9732   %}
 9733   ins_pipe(ialu_reg);
 9734 %}
 9735 
 9736 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9737   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9738 
 9739   ins_cost(200);
 9740   expand %{
 9741     cmovL_regU(cop, cr, dst, src);
 9742   %}
 9743 %}
 9744 
 9745 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
 9746 %{
 9747   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9748 
 9749   ins_cost(200);
 9750   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9751   ins_encode %{
 9752     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9753   %}
 9754   ins_pipe(pipe_cmov_reg);
 9755 %}
 9756 
 9757 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9758   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9759   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9760 
 9761   ins_cost(200); // XXX
 9762   format %{ "cmovpq  $dst, $src\n\t"
 9763             "cmovneq $dst, $src" %}
 9764   ins_encode %{
 9765     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9766     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9767   %}
 9768   ins_pipe(pipe_cmov_reg);
 9769 %}
 9770 
 9771 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9772 // inputs of the CMove
 9773 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9774   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9775   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9776 
 9777   ins_cost(200); // XXX
 9778   format %{ "cmovpq  $dst, $src\n\t"
 9779             "cmovneq $dst, $src" %}
 9780   ins_encode %{
 9781     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9782     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9783   %}
 9784   ins_pipe(pipe_cmov_reg);
 9785 %}
 9786 
 9787 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9788 %{
 9789   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9790 
 9791   ins_cost(200); // XXX
 9792   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9793   ins_encode %{
 9794     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9795   %}
 9796   ins_pipe(pipe_cmov_mem); // XXX
 9797 %}
 9798 
 9799 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9800   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9801 
 9802   ins_cost(200);
 9803   expand %{
 9804     cmovL_memU(cop, cr, dst, src);
 9805   %}
 9806 %}
 9807 
 9808 instruct cmovL_memUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL dst, memory src) %{
 9809   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9810 
 9811   ins_cost(200); // XXX
 9812   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9813   ins_encode %{
 9814     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9815   %}
 9816   ins_pipe(pipe_cmov_mem); // XXX
 9817 %}
 9818 
 9819 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9820 %{
 9821   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9822 
 9823   ins_cost(200); // XXX
 9824   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9825             "movss     $dst, $src\n"
 9826     "skip:" %}
 9827   ins_encode %{
 9828     Label Lskip;
 9829     // Invert sense of branch from sense of CMOV
 9830     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9831     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9832     __ bind(Lskip);
 9833   %}
 9834   ins_pipe(pipe_slow);
 9835 %}
 9836 
 9837 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9838 %{
 9839   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9840 
 9841   ins_cost(200); // XXX
 9842   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9843             "movss     $dst, $src\n"
 9844     "skip:" %}
 9845   ins_encode %{
 9846     Label Lskip;
 9847     // Invert sense of branch from sense of CMOV
 9848     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9849     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9850     __ bind(Lskip);
 9851   %}
 9852   ins_pipe(pipe_slow);
 9853 %}
 9854 
 9855 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9856   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9857 
 9858   ins_cost(200);
 9859   expand %{
 9860     cmovF_regU(cop, cr, dst, src);
 9861   %}
 9862 %}
 9863 
 9864 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
 9865 %{
 9866   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9867 
 9868   ins_cost(200); // XXX
 9869   format %{ "jn$cop    skip\t# signed, unsigned cmove float\n\t"
 9870             "movss     $dst, $src\n"
 9871     "skip:" %}
 9872   ins_encode %{
 9873     Label Lskip;
 9874     // Invert sense of branch from sense of CMOV
 9875     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9876     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9877     __ bind(Lskip);
 9878   %}
 9879   ins_pipe(pipe_slow);
 9880 %}
 9881 
 9882 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9883 %{
 9884   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9885 
 9886   ins_cost(200); // XXX
 9887   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9888             "movsd     $dst, $src\n"
 9889     "skip:" %}
 9890   ins_encode %{
 9891     Label Lskip;
 9892     // Invert sense of branch from sense of CMOV
 9893     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9894     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9895     __ bind(Lskip);
 9896   %}
 9897   ins_pipe(pipe_slow);
 9898 %}
 9899 
 9900 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9901 %{
 9902   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9903 
 9904   ins_cost(200); // XXX
 9905   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9906             "movsd     $dst, $src\n"
 9907     "skip:" %}
 9908   ins_encode %{
 9909     Label Lskip;
 9910     // Invert sense of branch from sense of CMOV
 9911     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9912     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9913     __ bind(Lskip);
 9914   %}
 9915   ins_pipe(pipe_slow);
 9916 %}
 9917 
 9918 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9919   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9920 
 9921   ins_cost(200);
 9922   expand %{
 9923     cmovD_regU(cop, cr, dst, src);
 9924   %}
 9925 %}
 9926 
 9927 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
 9928 %{
 9929   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9930 
 9931   ins_cost(200); // XXX
 9932   format %{ "jn$cop    skip\t# signed, unsigned cmove double\n\t"
 9933             "movsd     $dst, $src\n"
 9934     "skip:" %}
 9935   ins_encode %{
 9936     Label Lskip;
 9937     // Invert sense of branch from sense of CMOV
 9938     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9939     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9940     __ bind(Lskip);
 9941   %}
 9942   ins_pipe(pipe_slow);
 9943 %}
 9944 
 9945 //----------Arithmetic Instructions--------------------------------------------
 9946 //----------Addition Instructions----------------------------------------------
 9947 
 9948 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9949 %{
 9950   predicate(!UseAPX);
 9951   match(Set dst (AddI dst src));
 9952   effect(KILL cr);
 9953   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9954   format %{ "addl    $dst, $src\t# int" %}
 9955   ins_encode %{
 9956     __ addl($dst$$Register, $src$$Register);
 9957   %}
 9958   ins_pipe(ialu_reg_reg);
 9959 %}
 9960 
 9961 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9962 %{
 9963   predicate(UseAPX);
 9964   match(Set dst (AddI src1 src2));
 9965   effect(KILL cr);
 9966   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
 9967 
 9968   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9969   ins_encode %{
 9970     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9971   %}
 9972   ins_pipe(ialu_reg_reg);
 9973 %}
 9974 
 9975 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9976 %{
 9977   predicate(!UseAPX);
 9978   match(Set dst (AddI dst src));
 9979   effect(KILL cr);
 9980   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9981 
 9982   format %{ "addl    $dst, $src\t# int" %}
 9983   ins_encode %{
 9984     __ addl($dst$$Register, $src$$constant);
 9985   %}
 9986   ins_pipe( ialu_reg );
 9987 %}
 9988 
 9989 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9990 %{
 9991   predicate(UseAPX);
 9992   match(Set dst (AddI src1 src2));
 9993   effect(KILL cr);
 9994   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
 9995 
 9996   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9997   ins_encode %{
 9998     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9999   %}
10000   ins_pipe( ialu_reg );
10001 %}
10002 
10003 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10004 %{
10005   match(Set dst (AddI dst (LoadI src)));
10006   effect(KILL cr);
10007   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10008 
10009   ins_cost(150); // XXX
10010   format %{ "addl    $dst, $src\t# int" %}
10011   ins_encode %{
10012     __ addl($dst$$Register, $src$$Address);
10013   %}
10014   ins_pipe(ialu_reg_mem);
10015 %}
10016 
10017 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10018 %{
10019   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10020   effect(KILL cr);
10021   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10022 
10023   ins_cost(150); // XXX
10024   format %{ "addl    $dst, $src\t# int" %}
10025   ins_encode %{
10026     __ addl($dst$$Address, $src$$Register);
10027   %}
10028   ins_pipe(ialu_mem_reg);
10029 %}
10030 
10031 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10032 %{
10033   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10034   effect(KILL cr);
10035   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10036 
10037 
10038   ins_cost(125); // XXX
10039   format %{ "addl    $dst, $src\t# int" %}
10040   ins_encode %{
10041     __ addl($dst$$Address, $src$$constant);
10042   %}
10043   ins_pipe(ialu_mem_imm);
10044 %}
10045 
10046 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10047 %{
10048   predicate(!UseAPX && UseIncDec);
10049   match(Set dst (AddI dst src));
10050   effect(KILL cr);
10051 
10052   format %{ "incl    $dst\t# int" %}
10053   ins_encode %{
10054     __ incrementl($dst$$Register);
10055   %}
10056   ins_pipe(ialu_reg);
10057 %}
10058 
10059 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10060 %{
10061   predicate(UseAPX && UseIncDec);
10062   match(Set dst (AddI src val));
10063   effect(KILL cr);
10064   flag(PD::Flag_ndd_demotable_opr1);
10065 
10066   format %{ "eincl    $dst, $src\t# int ndd" %}
10067   ins_encode %{
10068     __ eincl($dst$$Register, $src$$Register, false);
10069   %}
10070   ins_pipe(ialu_reg);
10071 %}
10072 
10073 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10074 %{
10075   predicate(UseIncDec);
10076   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10077   effect(KILL cr);
10078 
10079   ins_cost(125); // XXX
10080   format %{ "incl    $dst\t# int" %}
10081   ins_encode %{
10082     __ incrementl($dst$$Address);
10083   %}
10084   ins_pipe(ialu_mem_imm);
10085 %}
10086 
10087 // XXX why does that use AddI
10088 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10089 %{
10090   predicate(!UseAPX && UseIncDec);
10091   match(Set dst (AddI dst src));
10092   effect(KILL cr);
10093 
10094   format %{ "decl    $dst\t# int" %}
10095   ins_encode %{
10096     __ decrementl($dst$$Register);
10097   %}
10098   ins_pipe(ialu_reg);
10099 %}
10100 
10101 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10102 %{
10103   predicate(UseAPX && UseIncDec);
10104   match(Set dst (AddI src val));
10105   effect(KILL cr);
10106   flag(PD::Flag_ndd_demotable_opr1);
10107 
10108   format %{ "edecl    $dst, $src\t# int ndd" %}
10109   ins_encode %{
10110     __ edecl($dst$$Register, $src$$Register, false);
10111   %}
10112   ins_pipe(ialu_reg);
10113 %}
10114 
10115 // XXX why does that use AddI
10116 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10117 %{
10118   predicate(UseIncDec);
10119   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10120   effect(KILL cr);
10121 
10122   ins_cost(125); // XXX
10123   format %{ "decl    $dst\t# int" %}
10124   ins_encode %{
10125     __ decrementl($dst$$Address);
10126   %}
10127   ins_pipe(ialu_mem_imm);
10128 %}
10129 
10130 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10131 %{
10132   predicate(VM_Version::supports_fast_2op_lea());
10133   match(Set dst (AddI (LShiftI index scale) disp));
10134 
10135   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10136   ins_encode %{
10137     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10138     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10139   %}
10140   ins_pipe(ialu_reg_reg);
10141 %}
10142 
10143 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10144 %{
10145   predicate(VM_Version::supports_fast_3op_lea());
10146   match(Set dst (AddI (AddI base index) disp));
10147 
10148   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10149   ins_encode %{
10150     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10151   %}
10152   ins_pipe(ialu_reg_reg);
10153 %}
10154 
10155 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10156 %{
10157   predicate(VM_Version::supports_fast_2op_lea());
10158   match(Set dst (AddI base (LShiftI index scale)));
10159 
10160   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10161   ins_encode %{
10162     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10163     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10164   %}
10165   ins_pipe(ialu_reg_reg);
10166 %}
10167 
10168 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10169 %{
10170   predicate(VM_Version::supports_fast_3op_lea());
10171   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10172 
10173   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10174   ins_encode %{
10175     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10176     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10177   %}
10178   ins_pipe(ialu_reg_reg);
10179 %}
10180 
10181 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10182 %{
10183   predicate(!UseAPX);
10184   match(Set dst (AddL dst src));
10185   effect(KILL cr);
10186   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10187 
10188   format %{ "addq    $dst, $src\t# long" %}
10189   ins_encode %{
10190     __ addq($dst$$Register, $src$$Register);
10191   %}
10192   ins_pipe(ialu_reg_reg);
10193 %}
10194 
10195 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10196 %{
10197   predicate(UseAPX);
10198   match(Set dst (AddL src1 src2));
10199   effect(KILL cr);
10200   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10201 
10202   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10203   ins_encode %{
10204     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10205   %}
10206   ins_pipe(ialu_reg_reg);
10207 %}
10208 
10209 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10210 %{
10211   predicate(!UseAPX);
10212   match(Set dst (AddL dst src));
10213   effect(KILL cr);
10214   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10215 
10216   format %{ "addq    $dst, $src\t# long" %}
10217   ins_encode %{
10218     __ addq($dst$$Register, $src$$constant);
10219   %}
10220   ins_pipe( ialu_reg );
10221 %}
10222 
10223 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10224 %{
10225   predicate(UseAPX);
10226   match(Set dst (AddL src1 src2));
10227   effect(KILL cr);
10228   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10229 
10230   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10231   ins_encode %{
10232     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10233   %}
10234   ins_pipe( ialu_reg );
10235 %}
10236 
10237 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10238 %{
10239   match(Set dst (AddL dst (LoadL src)));
10240   effect(KILL cr);
10241   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10242 
10243   ins_cost(150); // XXX
10244   format %{ "addq    $dst, $src\t# long" %}
10245   ins_encode %{
10246     __ addq($dst$$Register, $src$$Address);
10247   %}
10248   ins_pipe(ialu_reg_mem);
10249 %}
10250 
10251 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10252 %{
10253   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10254   effect(KILL cr);
10255   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10256 
10257   ins_cost(150); // XXX
10258   format %{ "addq    $dst, $src\t# long" %}
10259   ins_encode %{
10260     __ addq($dst$$Address, $src$$Register);
10261   %}
10262   ins_pipe(ialu_mem_reg);
10263 %}
10264 
10265 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10266 %{
10267   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10268   effect(KILL cr);
10269   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10270 
10271   ins_cost(125); // XXX
10272   format %{ "addq    $dst, $src\t# long" %}
10273   ins_encode %{
10274     __ addq($dst$$Address, $src$$constant);
10275   %}
10276   ins_pipe(ialu_mem_imm);
10277 %}
10278 
10279 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10280 %{
10281   predicate(!UseAPX && UseIncDec);
10282   match(Set dst (AddL dst src));
10283   effect(KILL cr);
10284 
10285   format %{ "incq    $dst\t# long" %}
10286   ins_encode %{
10287     __ incrementq($dst$$Register);
10288   %}
10289   ins_pipe(ialu_reg);
10290 %}
10291 
10292 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10293 %{
10294   predicate(UseAPX && UseIncDec);
10295   match(Set dst (AddL src val));
10296   effect(KILL cr);
10297   flag(PD::Flag_ndd_demotable_opr1);
10298 
10299   format %{ "eincq    $dst, $src\t# long ndd" %}
10300   ins_encode %{
10301     __ eincq($dst$$Register, $src$$Register, false);
10302   %}
10303   ins_pipe(ialu_reg);
10304 %}
10305 
10306 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10307 %{
10308   predicate(UseIncDec);
10309   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10310   effect(KILL cr);
10311 
10312   ins_cost(125); // XXX
10313   format %{ "incq    $dst\t# long" %}
10314   ins_encode %{
10315     __ incrementq($dst$$Address);
10316   %}
10317   ins_pipe(ialu_mem_imm);
10318 %}
10319 
10320 // XXX why does that use AddL
10321 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10322 %{
10323   predicate(!UseAPX && UseIncDec);
10324   match(Set dst (AddL dst src));
10325   effect(KILL cr);
10326 
10327   format %{ "decq    $dst\t# long" %}
10328   ins_encode %{
10329     __ decrementq($dst$$Register);
10330   %}
10331   ins_pipe(ialu_reg);
10332 %}
10333 
10334 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10335 %{
10336   predicate(UseAPX && UseIncDec);
10337   match(Set dst (AddL src val));
10338   effect(KILL cr);
10339   flag(PD::Flag_ndd_demotable_opr1);
10340 
10341   format %{ "edecq    $dst, $src\t# long ndd" %}
10342   ins_encode %{
10343     __ edecq($dst$$Register, $src$$Register, false);
10344   %}
10345   ins_pipe(ialu_reg);
10346 %}
10347 
10348 // XXX why does that use AddL
10349 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10350 %{
10351   predicate(UseIncDec);
10352   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10353   effect(KILL cr);
10354 
10355   ins_cost(125); // XXX
10356   format %{ "decq    $dst\t# long" %}
10357   ins_encode %{
10358     __ decrementq($dst$$Address);
10359   %}
10360   ins_pipe(ialu_mem_imm);
10361 %}
10362 
10363 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10364 %{
10365   predicate(VM_Version::supports_fast_2op_lea());
10366   match(Set dst (AddL (LShiftL index scale) disp));
10367 
10368   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10369   ins_encode %{
10370     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10371     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10372   %}
10373   ins_pipe(ialu_reg_reg);
10374 %}
10375 
10376 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10377 %{
10378   predicate(VM_Version::supports_fast_3op_lea());
10379   match(Set dst (AddL (AddL base index) disp));
10380 
10381   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10382   ins_encode %{
10383     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10384   %}
10385   ins_pipe(ialu_reg_reg);
10386 %}
10387 
10388 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10389 %{
10390   predicate(VM_Version::supports_fast_2op_lea());
10391   match(Set dst (AddL base (LShiftL index scale)));
10392 
10393   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10394   ins_encode %{
10395     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10396     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10397   %}
10398   ins_pipe(ialu_reg_reg);
10399 %}
10400 
10401 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10402 %{
10403   predicate(VM_Version::supports_fast_3op_lea());
10404   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10405 
10406   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10407   ins_encode %{
10408     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10409     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10410   %}
10411   ins_pipe(ialu_reg_reg);
10412 %}
10413 
10414 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10415 %{
10416   match(Set dst (AddP dst src));
10417   effect(KILL cr);
10418   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10419 
10420   format %{ "addq    $dst, $src\t# ptr" %}
10421   ins_encode %{
10422     __ addq($dst$$Register, $src$$Register);
10423   %}
10424   ins_pipe(ialu_reg_reg);
10425 %}
10426 
10427 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10428 %{
10429   match(Set dst (AddP dst src));
10430   effect(KILL cr);
10431   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10432 
10433   format %{ "addq    $dst, $src\t# ptr" %}
10434   ins_encode %{
10435     __ addq($dst$$Register, $src$$constant);
10436   %}
10437   ins_pipe( ialu_reg );
10438 %}
10439 
10440 // XXX addP mem ops ????
10441 
10442 instruct checkCastPP(rRegP dst)
10443 %{
10444   match(Set dst (CheckCastPP dst));
10445 
10446   size(0);
10447   format %{ "# checkcastPP of $dst" %}
10448   ins_encode(/* empty encoding */);
10449   ins_pipe(empty);
10450 %}
10451 
10452 instruct castPP(rRegP dst)
10453 %{
10454   match(Set dst (CastPP dst));
10455 
10456   size(0);
10457   format %{ "# castPP of $dst" %}
10458   ins_encode(/* empty encoding */);
10459   ins_pipe(empty);
10460 %}
10461 
10462 instruct castII(rRegI dst)
10463 %{
10464   predicate(VerifyConstraintCasts == 0);
10465   match(Set dst (CastII dst));
10466 
10467   size(0);
10468   format %{ "# castII of $dst" %}
10469   ins_encode(/* empty encoding */);
10470   ins_cost(0);
10471   ins_pipe(empty);
10472 %}
10473 
10474 instruct castII_checked(rRegI dst, rFlagsReg cr)
10475 %{
10476   predicate(VerifyConstraintCasts > 0);
10477   match(Set dst (CastII dst));
10478 
10479   effect(KILL cr);
10480   format %{ "# cast_checked_II $dst" %}
10481   ins_encode %{
10482     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10483   %}
10484   ins_pipe(pipe_slow);
10485 %}
10486 
10487 instruct castLL(rRegL dst)
10488 %{
10489   predicate(VerifyConstraintCasts == 0);
10490   match(Set dst (CastLL dst));
10491 
10492   size(0);
10493   format %{ "# castLL of $dst" %}
10494   ins_encode(/* empty encoding */);
10495   ins_cost(0);
10496   ins_pipe(empty);
10497 %}
10498 
10499 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10500 %{
10501   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10502   match(Set dst (CastLL dst));
10503 
10504   effect(KILL cr);
10505   format %{ "# cast_checked_LL $dst" %}
10506   ins_encode %{
10507     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10508   %}
10509   ins_pipe(pipe_slow);
10510 %}
10511 
10512 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10513 %{
10514   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10515   match(Set dst (CastLL dst));
10516 
10517   effect(KILL cr, TEMP tmp);
10518   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10519   ins_encode %{
10520     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10521   %}
10522   ins_pipe(pipe_slow);
10523 %}
10524 
10525 instruct castFF(regF dst)
10526 %{
10527   match(Set dst (CastFF dst));
10528 
10529   size(0);
10530   format %{ "# castFF of $dst" %}
10531   ins_encode(/* empty encoding */);
10532   ins_cost(0);
10533   ins_pipe(empty);
10534 %}
10535 
10536 instruct castHH(regF dst)
10537 %{
10538   match(Set dst (CastHH dst));
10539 
10540   size(0);
10541   format %{ "# castHH of $dst" %}
10542   ins_encode(/* empty encoding */);
10543   ins_cost(0);
10544   ins_pipe(empty);
10545 %}
10546 
10547 instruct castDD(regD dst)
10548 %{
10549   match(Set dst (CastDD dst));
10550 
10551   size(0);
10552   format %{ "# castDD of $dst" %}
10553   ins_encode(/* empty encoding */);
10554   ins_cost(0);
10555   ins_pipe(empty);
10556 %}
10557 
10558 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10559 instruct compareAndSwapP(rRegI res,
10560                          memory mem_ptr,
10561                          rax_RegP oldval, rRegP newval,
10562                          rFlagsReg cr)
10563 %{
10564   predicate(n->as_LoadStore()->barrier_data() == 0);
10565   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10566   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10567   effect(KILL cr, KILL oldval);
10568 
10569   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10570             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10571             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10572   ins_encode %{
10573     __ lock();
10574     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10575     __ setcc(Assembler::equal, $res$$Register);
10576   %}
10577   ins_pipe( pipe_cmpxchg );
10578 %}
10579 
10580 instruct compareAndSwapL(rRegI res,
10581                          memory mem_ptr,
10582                          rax_RegL oldval, rRegL newval,
10583                          rFlagsReg cr)
10584 %{
10585   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10586   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10587   effect(KILL cr, KILL oldval);
10588 
10589   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10590             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10591             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10592   ins_encode %{
10593     __ lock();
10594     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10595     __ setcc(Assembler::equal, $res$$Register);
10596   %}
10597   ins_pipe( pipe_cmpxchg );
10598 %}
10599 
10600 instruct compareAndSwapI(rRegI res,
10601                          memory mem_ptr,
10602                          rax_RegI oldval, rRegI newval,
10603                          rFlagsReg cr)
10604 %{
10605   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10606   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10607   effect(KILL cr, KILL oldval);
10608 
10609   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10610             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10611             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10612   ins_encode %{
10613     __ lock();
10614     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10615     __ setcc(Assembler::equal, $res$$Register);
10616   %}
10617   ins_pipe( pipe_cmpxchg );
10618 %}
10619 
10620 instruct compareAndSwapB(rRegI res,
10621                          memory mem_ptr,
10622                          rax_RegI oldval, rRegI newval,
10623                          rFlagsReg cr)
10624 %{
10625   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10626   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10627   effect(KILL cr, KILL oldval);
10628 
10629   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10630             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10631             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10632   ins_encode %{
10633     __ lock();
10634     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10635     __ setcc(Assembler::equal, $res$$Register);
10636   %}
10637   ins_pipe( pipe_cmpxchg );
10638 %}
10639 
10640 instruct compareAndSwapS(rRegI res,
10641                          memory mem_ptr,
10642                          rax_RegI oldval, rRegI newval,
10643                          rFlagsReg cr)
10644 %{
10645   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10646   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10647   effect(KILL cr, KILL oldval);
10648 
10649   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10650             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10651             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10652   ins_encode %{
10653     __ lock();
10654     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10655     __ setcc(Assembler::equal, $res$$Register);
10656   %}
10657   ins_pipe( pipe_cmpxchg );
10658 %}
10659 
10660 instruct compareAndSwapN(rRegI res,
10661                           memory mem_ptr,
10662                           rax_RegN oldval, rRegN newval,
10663                           rFlagsReg cr) %{
10664   predicate(n->as_LoadStore()->barrier_data() == 0);
10665   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10666   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10667   effect(KILL cr, KILL oldval);
10668 
10669   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10670             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10671             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10672   ins_encode %{
10673     __ lock();
10674     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10675     __ setcc(Assembler::equal, $res$$Register);
10676   %}
10677   ins_pipe( pipe_cmpxchg );
10678 %}
10679 
10680 instruct compareAndExchangeB(
10681                          memory mem_ptr,
10682                          rax_RegI oldval, rRegI newval,
10683                          rFlagsReg cr)
10684 %{
10685   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10686   effect(KILL cr);
10687 
10688   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10689             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10690   ins_encode %{
10691     __ lock();
10692     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10693   %}
10694   ins_pipe( pipe_cmpxchg );
10695 %}
10696 
10697 instruct compareAndExchangeS(
10698                          memory mem_ptr,
10699                          rax_RegI oldval, rRegI newval,
10700                          rFlagsReg cr)
10701 %{
10702   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10703   effect(KILL cr);
10704 
10705   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10706             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10707   ins_encode %{
10708     __ lock();
10709     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10710   %}
10711   ins_pipe( pipe_cmpxchg );
10712 %}
10713 
10714 instruct compareAndExchangeI(
10715                          memory mem_ptr,
10716                          rax_RegI oldval, rRegI newval,
10717                          rFlagsReg cr)
10718 %{
10719   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10720   effect(KILL cr);
10721 
10722   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10723             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10724   ins_encode %{
10725     __ lock();
10726     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10727   %}
10728   ins_pipe( pipe_cmpxchg );
10729 %}
10730 
10731 instruct compareAndExchangeL(
10732                          memory mem_ptr,
10733                          rax_RegL oldval, rRegL newval,
10734                          rFlagsReg cr)
10735 %{
10736   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10737   effect(KILL cr);
10738 
10739   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10740             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10741   ins_encode %{
10742     __ lock();
10743     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10744   %}
10745   ins_pipe( pipe_cmpxchg );
10746 %}
10747 
10748 instruct compareAndExchangeN(
10749                           memory mem_ptr,
10750                           rax_RegN oldval, rRegN newval,
10751                           rFlagsReg cr) %{
10752   predicate(n->as_LoadStore()->barrier_data() == 0);
10753   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10754   effect(KILL cr);
10755 
10756   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10757             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10758   ins_encode %{
10759     __ lock();
10760     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10761   %}
10762   ins_pipe( pipe_cmpxchg );
10763 %}
10764 
10765 instruct compareAndExchangeP(
10766                          memory mem_ptr,
10767                          rax_RegP oldval, rRegP newval,
10768                          rFlagsReg cr)
10769 %{
10770   predicate(n->as_LoadStore()->barrier_data() == 0);
10771   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10772   effect(KILL cr);
10773 
10774   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10775             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10776   ins_encode %{
10777     __ lock();
10778     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10779   %}
10780   ins_pipe( pipe_cmpxchg );
10781 %}
10782 
10783 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10784   predicate(n->as_LoadStore()->result_not_used());
10785   match(Set dummy (GetAndAddB mem add));
10786   effect(KILL cr);
10787   format %{ "addb_lock   $mem, $add" %}
10788   ins_encode %{
10789     __ lock();
10790     __ addb($mem$$Address, $add$$Register);
10791   %}
10792   ins_pipe(pipe_cmpxchg);
10793 %}
10794 
10795 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10796   predicate(n->as_LoadStore()->result_not_used());
10797   match(Set dummy (GetAndAddB mem add));
10798   effect(KILL cr);
10799   format %{ "addb_lock   $mem, $add" %}
10800   ins_encode %{
10801     __ lock();
10802     __ addb($mem$$Address, $add$$constant);
10803   %}
10804   ins_pipe(pipe_cmpxchg);
10805 %}
10806 
10807 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10808   predicate(!n->as_LoadStore()->result_not_used());
10809   match(Set newval (GetAndAddB mem newval));
10810   effect(KILL cr);
10811   format %{ "xaddb_lock  $mem, $newval\t# $newval -> byte" %}
10812   ins_encode %{
10813     __ lock();
10814     __ xaddb($mem$$Address, $newval$$Register);
10815     __ narrow_subword_type($newval$$Register, T_BYTE);
10816   %}
10817   ins_pipe(pipe_cmpxchg);
10818 %}
10819 
10820 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10821   predicate(n->as_LoadStore()->result_not_used());
10822   match(Set dummy (GetAndAddS mem add));
10823   effect(KILL cr);
10824   format %{ "addw_lock   $mem, $add" %}
10825   ins_encode %{
10826     __ lock();
10827     __ addw($mem$$Address, $add$$Register);
10828   %}
10829   ins_pipe(pipe_cmpxchg);
10830 %}
10831 
10832 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10833   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10834   match(Set dummy (GetAndAddS mem add));
10835   effect(KILL cr);
10836   format %{ "addw_lock   $mem, $add" %}
10837   ins_encode %{
10838     __ lock();
10839     __ addw($mem$$Address, $add$$constant);
10840   %}
10841   ins_pipe(pipe_cmpxchg);
10842 %}
10843 
10844 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10845   predicate(!n->as_LoadStore()->result_not_used());
10846   match(Set newval (GetAndAddS mem newval));
10847   effect(KILL cr);
10848   format %{ "xaddw_lock  $mem, $newval\t# $newval -> short" %}
10849   ins_encode %{
10850     __ lock();
10851     __ xaddw($mem$$Address, $newval$$Register);
10852     __ narrow_subword_type($newval$$Register, T_SHORT);
10853   %}
10854   ins_pipe(pipe_cmpxchg);
10855 %}
10856 
10857 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10858   predicate(n->as_LoadStore()->result_not_used());
10859   match(Set dummy (GetAndAddI mem add));
10860   effect(KILL cr);
10861   format %{ "addl_lock   $mem, $add" %}
10862   ins_encode %{
10863     __ lock();
10864     __ addl($mem$$Address, $add$$Register);
10865   %}
10866   ins_pipe(pipe_cmpxchg);
10867 %}
10868 
10869 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10870   predicate(n->as_LoadStore()->result_not_used());
10871   match(Set dummy (GetAndAddI mem add));
10872   effect(KILL cr);
10873   format %{ "addl_lock   $mem, $add" %}
10874   ins_encode %{
10875     __ lock();
10876     __ addl($mem$$Address, $add$$constant);
10877   %}
10878   ins_pipe(pipe_cmpxchg);
10879 %}
10880 
10881 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10882   predicate(!n->as_LoadStore()->result_not_used());
10883   match(Set newval (GetAndAddI mem newval));
10884   effect(KILL cr);
10885   format %{ "xaddl_lock  $mem, $newval" %}
10886   ins_encode %{
10887     __ lock();
10888     __ xaddl($mem$$Address, $newval$$Register);
10889   %}
10890   ins_pipe(pipe_cmpxchg);
10891 %}
10892 
10893 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10894   predicate(n->as_LoadStore()->result_not_used());
10895   match(Set dummy (GetAndAddL mem add));
10896   effect(KILL cr);
10897   format %{ "addq_lock   $mem, $add" %}
10898   ins_encode %{
10899     __ lock();
10900     __ addq($mem$$Address, $add$$Register);
10901   %}
10902   ins_pipe(pipe_cmpxchg);
10903 %}
10904 
10905 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10906   predicate(n->as_LoadStore()->result_not_used());
10907   match(Set dummy (GetAndAddL mem add));
10908   effect(KILL cr);
10909   format %{ "addq_lock   $mem, $add" %}
10910   ins_encode %{
10911     __ lock();
10912     __ addq($mem$$Address, $add$$constant);
10913   %}
10914   ins_pipe(pipe_cmpxchg);
10915 %}
10916 
10917 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10918   predicate(!n->as_LoadStore()->result_not_used());
10919   match(Set newval (GetAndAddL mem newval));
10920   effect(KILL cr);
10921   format %{ "xaddq_lock  $mem, $newval" %}
10922   ins_encode %{
10923     __ lock();
10924     __ xaddq($mem$$Address, $newval$$Register);
10925   %}
10926   ins_pipe(pipe_cmpxchg);
10927 %}
10928 
10929 instruct xchgB( memory mem, rRegI newval) %{
10930   match(Set newval (GetAndSetB mem newval));
10931   format %{ "XCHGB  $newval,[$mem]\t# $newval -> byte" %}
10932   ins_encode %{
10933     __ xchgb($newval$$Register, $mem$$Address);
10934     __ narrow_subword_type($newval$$Register, T_BYTE);
10935   %}
10936   ins_pipe( pipe_cmpxchg );
10937 %}
10938 
10939 instruct xchgS( memory mem, rRegI newval) %{
10940   match(Set newval (GetAndSetS mem newval));
10941   format %{ "XCHGW  $newval,[$mem]\t# $newval -> short" %}
10942   ins_encode %{
10943     __ xchgw($newval$$Register, $mem$$Address);
10944     __ narrow_subword_type($newval$$Register, T_SHORT);
10945   %}
10946   ins_pipe( pipe_cmpxchg );
10947 %}
10948 
10949 instruct xchgI( memory mem, rRegI newval) %{
10950   match(Set newval (GetAndSetI mem newval));
10951   format %{ "XCHGL  $newval,[$mem]" %}
10952   ins_encode %{
10953     __ xchgl($newval$$Register, $mem$$Address);
10954   %}
10955   ins_pipe( pipe_cmpxchg );
10956 %}
10957 
10958 instruct xchgL( memory mem, rRegL newval) %{
10959   match(Set newval (GetAndSetL mem newval));
10960   format %{ "XCHGL  $newval,[$mem]" %}
10961   ins_encode %{
10962     __ xchgq($newval$$Register, $mem$$Address);
10963   %}
10964   ins_pipe( pipe_cmpxchg );
10965 %}
10966 
10967 instruct xchgP( memory mem, rRegP newval) %{
10968   match(Set newval (GetAndSetP mem newval));
10969   predicate(n->as_LoadStore()->barrier_data() == 0);
10970   format %{ "XCHGQ  $newval,[$mem]" %}
10971   ins_encode %{
10972     __ xchgq($newval$$Register, $mem$$Address);
10973   %}
10974   ins_pipe( pipe_cmpxchg );
10975 %}
10976 
10977 instruct xchgN( memory mem, rRegN newval) %{
10978   predicate(n->as_LoadStore()->barrier_data() == 0);
10979   match(Set newval (GetAndSetN mem newval));
10980   format %{ "XCHGL  $newval,$mem]" %}
10981   ins_encode %{
10982     __ xchgl($newval$$Register, $mem$$Address);
10983   %}
10984   ins_pipe( pipe_cmpxchg );
10985 %}
10986 
10987 //----------Abs Instructions-------------------------------------------
10988 
10989 // Integer Absolute Instructions
10990 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10991 %{
10992   match(Set dst (AbsI src));
10993   effect(TEMP dst, KILL cr);
10994   format %{ "xorl    $dst, $dst\t# abs int\n\t"
10995             "subl    $dst, $src\n\t"
10996             "cmovll  $dst, $src" %}
10997   ins_encode %{
10998     __ xorl($dst$$Register, $dst$$Register);
10999     __ subl($dst$$Register, $src$$Register);
11000     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11001   %}
11002 
11003   ins_pipe(ialu_reg_reg);
11004 %}
11005 
11006 // Long Absolute Instructions
11007 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11008 %{
11009   match(Set dst (AbsL src));
11010   effect(TEMP dst, KILL cr);
11011   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11012             "subq    $dst, $src\n\t"
11013             "cmovlq  $dst, $src" %}
11014   ins_encode %{
11015     __ xorl($dst$$Register, $dst$$Register);
11016     __ subq($dst$$Register, $src$$Register);
11017     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11018   %}
11019 
11020   ins_pipe(ialu_reg_reg);
11021 %}
11022 
11023 //----------Subtraction Instructions-------------------------------------------
11024 
11025 // Integer Subtraction Instructions
11026 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11027 %{
11028   predicate(!UseAPX);
11029   match(Set dst (SubI dst src));
11030   effect(KILL cr);
11031   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11032 
11033   format %{ "subl    $dst, $src\t# int" %}
11034   ins_encode %{
11035     __ subl($dst$$Register, $src$$Register);
11036   %}
11037   ins_pipe(ialu_reg_reg);
11038 %}
11039 
11040 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11041 %{
11042   predicate(UseAPX);
11043   match(Set dst (SubI src1 src2));
11044   effect(KILL cr);
11045   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11046 
11047   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11048   ins_encode %{
11049     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11050   %}
11051   ins_pipe(ialu_reg_reg);
11052 %}
11053 
11054 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11055 %{
11056   predicate(UseAPX);
11057   match(Set dst (SubI src1 src2));
11058   effect(KILL cr);
11059   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11060 
11061   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11062   ins_encode %{
11063     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11064   %}
11065   ins_pipe(ialu_reg_reg);
11066 %}
11067 
11068 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11069 %{
11070   match(Set dst (SubI dst (LoadI src)));
11071   effect(KILL cr);
11072   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11073 
11074   ins_cost(150);
11075   format %{ "subl    $dst, $src\t# int" %}
11076   ins_encode %{
11077     __ subl($dst$$Register, $src$$Address);
11078   %}
11079   ins_pipe(ialu_reg_mem);
11080 %}
11081 
11082 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11083 %{
11084   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11085   effect(KILL cr);
11086   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11087 
11088   ins_cost(150);
11089   format %{ "subl    $dst, $src\t# int" %}
11090   ins_encode %{
11091     __ subl($dst$$Address, $src$$Register);
11092   %}
11093   ins_pipe(ialu_mem_reg);
11094 %}
11095 
11096 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11097 %{
11098   predicate(!UseAPX);
11099   match(Set dst (SubL dst src));
11100   effect(KILL cr);
11101   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11102 
11103   format %{ "subq    $dst, $src\t# long" %}
11104   ins_encode %{
11105     __ subq($dst$$Register, $src$$Register);
11106   %}
11107   ins_pipe(ialu_reg_reg);
11108 %}
11109 
11110 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11111 %{
11112   predicate(UseAPX);
11113   match(Set dst (SubL src1 src2));
11114   effect(KILL cr);
11115   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11116 
11117   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11118   ins_encode %{
11119     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11120   %}
11121   ins_pipe(ialu_reg_reg);
11122 %}
11123 
11124 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11125 %{
11126   predicate(UseAPX);
11127   match(Set dst (SubL src1 src2));
11128   effect(KILL cr);
11129   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11130 
11131   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11132   ins_encode %{
11133     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11134   %}
11135   ins_pipe(ialu_reg_reg);
11136 %}
11137 
11138 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11139 %{
11140   match(Set dst (SubL dst (LoadL src)));
11141   effect(KILL cr);
11142   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11143 
11144   ins_cost(150);
11145   format %{ "subq    $dst, $src\t# long" %}
11146   ins_encode %{
11147     __ subq($dst$$Register, $src$$Address);
11148   %}
11149   ins_pipe(ialu_reg_mem);
11150 %}
11151 
11152 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11153 %{
11154   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11155   effect(KILL cr);
11156   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11157 
11158   ins_cost(150);
11159   format %{ "subq    $dst, $src\t# long" %}
11160   ins_encode %{
11161     __ subq($dst$$Address, $src$$Register);
11162   %}
11163   ins_pipe(ialu_mem_reg);
11164 %}
11165 
11166 // Subtract from a pointer
11167 // XXX hmpf???
11168 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11169 %{
11170   match(Set dst (AddP dst (SubI zero src)));
11171   effect(KILL cr);
11172 
11173   format %{ "subq    $dst, $src\t# ptr - int" %}
11174   ins_encode %{
11175     __ subq($dst$$Register, $src$$Register);
11176   %}
11177   ins_pipe(ialu_reg_reg);
11178 %}
11179 
11180 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11181 %{
11182   predicate(!UseAPX);
11183   match(Set dst (SubI zero dst));
11184   effect(KILL cr);
11185   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11186 
11187   format %{ "negl    $dst\t# int" %}
11188   ins_encode %{
11189     __ negl($dst$$Register);
11190   %}
11191   ins_pipe(ialu_reg);
11192 %}
11193 
11194 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11195 %{
11196   predicate(UseAPX);
11197   match(Set dst (SubI zero src));
11198   effect(KILL cr);
11199   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11200 
11201   format %{ "enegl    $dst, $src\t# int ndd" %}
11202   ins_encode %{
11203     __ enegl($dst$$Register, $src$$Register, false);
11204   %}
11205   ins_pipe(ialu_reg);
11206 %}
11207 
11208 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11209 %{
11210   predicate(!UseAPX);
11211   match(Set dst (NegI dst));
11212   effect(KILL cr);
11213   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11214 
11215   format %{ "negl    $dst\t# int" %}
11216   ins_encode %{
11217     __ negl($dst$$Register);
11218   %}
11219   ins_pipe(ialu_reg);
11220 %}
11221 
11222 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11223 %{
11224   predicate(UseAPX);
11225   match(Set dst (NegI src));
11226   effect(KILL cr);
11227   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11228 
11229   format %{ "enegl    $dst, $src\t# int ndd" %}
11230   ins_encode %{
11231     __ enegl($dst$$Register, $src$$Register, false);
11232   %}
11233   ins_pipe(ialu_reg);
11234 %}
11235 
11236 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11237 %{
11238   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11239   effect(KILL cr);
11240   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11241 
11242   format %{ "negl    $dst\t# int" %}
11243   ins_encode %{
11244     __ negl($dst$$Address);
11245   %}
11246   ins_pipe(ialu_reg);
11247 %}
11248 
11249 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11250 %{
11251   predicate(!UseAPX);
11252   match(Set dst (SubL zero dst));
11253   effect(KILL cr);
11254   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11255 
11256   format %{ "negq    $dst\t# long" %}
11257   ins_encode %{
11258     __ negq($dst$$Register);
11259   %}
11260   ins_pipe(ialu_reg);
11261 %}
11262 
11263 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11264 %{
11265   predicate(UseAPX);
11266   match(Set dst (SubL zero src));
11267   effect(KILL cr);
11268   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11269 
11270   format %{ "enegq    $dst, $src\t# long ndd" %}
11271   ins_encode %{
11272     __ enegq($dst$$Register, $src$$Register, false);
11273   %}
11274   ins_pipe(ialu_reg);
11275 %}
11276 
11277 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11278 %{
11279   predicate(!UseAPX);
11280   match(Set dst (NegL dst));
11281   effect(KILL cr);
11282   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11283 
11284   format %{ "negq    $dst\t# int" %}
11285   ins_encode %{
11286     __ negq($dst$$Register);
11287   %}
11288   ins_pipe(ialu_reg);
11289 %}
11290 
11291 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11292 %{
11293   predicate(UseAPX);
11294   match(Set dst (NegL src));
11295   effect(KILL cr);
11296   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11297 
11298   format %{ "enegq    $dst, $src\t# long ndd" %}
11299   ins_encode %{
11300     __ enegq($dst$$Register, $src$$Register, false);
11301   %}
11302   ins_pipe(ialu_reg);
11303 %}
11304 
11305 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11306 %{
11307   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11308   effect(KILL cr);
11309   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11310 
11311   format %{ "negq    $dst\t# long" %}
11312   ins_encode %{
11313     __ negq($dst$$Address);
11314   %}
11315   ins_pipe(ialu_reg);
11316 %}
11317 
11318 //----------Multiplication/Division Instructions-------------------------------
11319 // Integer Multiplication Instructions
11320 // Multiply Register
11321 
11322 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11323 %{
11324   predicate(!UseAPX);
11325   match(Set dst (MulI dst src));
11326   effect(KILL cr);
11327 
11328   ins_cost(300);
11329   format %{ "imull   $dst, $src\t# int" %}
11330   ins_encode %{
11331     __ imull($dst$$Register, $src$$Register);
11332   %}
11333   ins_pipe(ialu_reg_reg_alu0);
11334 %}
11335 
11336 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11337 %{
11338   predicate(UseAPX);
11339   match(Set dst (MulI src1 src2));
11340   effect(KILL cr);
11341   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11342 
11343   ins_cost(300);
11344   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11345   ins_encode %{
11346     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11347   %}
11348   ins_pipe(ialu_reg_reg_alu0);
11349 %}
11350 
11351 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11352 %{
11353   match(Set dst (MulI src imm));
11354   effect(KILL cr);
11355 
11356   ins_cost(300);
11357   format %{ "imull   $dst, $src, $imm\t# int" %}
11358   ins_encode %{
11359     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11360   %}
11361   ins_pipe(ialu_reg_reg_alu0);
11362 %}
11363 
11364 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11365 %{
11366   match(Set dst (MulI dst (LoadI src)));
11367   effect(KILL cr);
11368 
11369   ins_cost(350);
11370   format %{ "imull   $dst, $src\t# int" %}
11371   ins_encode %{
11372     __ imull($dst$$Register, $src$$Address);
11373   %}
11374   ins_pipe(ialu_reg_mem_alu0);
11375 %}
11376 
11377 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11378 %{
11379   match(Set dst (MulI (LoadI src) imm));
11380   effect(KILL cr);
11381 
11382   ins_cost(300);
11383   format %{ "imull   $dst, $src, $imm\t# int" %}
11384   ins_encode %{
11385     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11386   %}
11387   ins_pipe(ialu_reg_mem_alu0);
11388 %}
11389 
11390 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11391 %{
11392   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11393   effect(KILL cr, KILL src2);
11394 
11395   expand %{ mulI_rReg(dst, src1, cr);
11396            mulI_rReg(src2, src3, cr);
11397            addI_rReg(dst, src2, cr); %}
11398 %}
11399 
11400 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11401 %{
11402   predicate(!UseAPX);
11403   match(Set dst (MulL dst src));
11404   effect(KILL cr);
11405 
11406   ins_cost(300);
11407   format %{ "imulq   $dst, $src\t# long" %}
11408   ins_encode %{
11409     __ imulq($dst$$Register, $src$$Register);
11410   %}
11411   ins_pipe(ialu_reg_reg_alu0);
11412 %}
11413 
11414 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11415 %{
11416   predicate(UseAPX);
11417   match(Set dst (MulL src1 src2));
11418   effect(KILL cr);
11419   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11420 
11421   ins_cost(300);
11422   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11423   ins_encode %{
11424     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11425   %}
11426   ins_pipe(ialu_reg_reg_alu0);
11427 %}
11428 
11429 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11430 %{
11431   match(Set dst (MulL src imm));
11432   effect(KILL cr);
11433 
11434   ins_cost(300);
11435   format %{ "imulq   $dst, $src, $imm\t# long" %}
11436   ins_encode %{
11437     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11438   %}
11439   ins_pipe(ialu_reg_reg_alu0);
11440 %}
11441 
11442 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11443 %{
11444   match(Set dst (MulL dst (LoadL src)));
11445   effect(KILL cr);
11446 
11447   ins_cost(350);
11448   format %{ "imulq   $dst, $src\t# long" %}
11449   ins_encode %{
11450     __ imulq($dst$$Register, $src$$Address);
11451   %}
11452   ins_pipe(ialu_reg_mem_alu0);
11453 %}
11454 
11455 
11456 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11457 %{
11458   match(Set dst (MulL (LoadL src) imm));
11459   effect(KILL cr);
11460 
11461   ins_cost(300);
11462   format %{ "imulq   $dst, $src, $imm\t# long" %}
11463   ins_encode %{
11464     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11465   %}
11466   ins_pipe(ialu_reg_mem_alu0);
11467 %}
11468 
11469 instruct mulHiLoL_rReg(rax_RegL rax, rdx_RegL rdx, rRegL src, rFlagsReg cr)
11470 %{
11471   match(MulHiLoL src rax);
11472   match(MulHiLoL rax src);
11473   effect(KILL cr);
11474 
11475   ins_cost(300);
11476   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhilo" %}
11477   ins_encode %{
11478     __ imulq($src$$Register);
11479   %}
11480   ins_pipe(ialu_reg_reg_alu0);
11481 %}
11482 
11483 instruct umulHiLoL_rReg(rax_RegL rax, rdx_RegL rdx, rRegL src, rFlagsReg cr)
11484 %{
11485   match(UMulHiLoL src rax);
11486   match(UMulHiLoL rax src);
11487   effect(KILL cr);
11488 
11489   ins_cost(300);
11490   format %{ "mulq    RDX:RAX, RAX, $src\t# umulhilo" %}
11491   ins_encode %{
11492     __ mulq($src$$Register);
11493   %}
11494   ins_pipe(ialu_reg_reg_alu0);
11495 %}
11496 
11497 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11498 %{
11499   match(Set dst (MulHiL src rax));
11500   effect(USE_KILL rax, KILL cr);
11501 
11502   ins_cost(300);
11503   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11504   ins_encode %{
11505     __ imulq($src$$Register);
11506   %}
11507   ins_pipe(ialu_reg_reg_alu0);
11508 %}
11509 
11510 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11511 %{
11512   match(Set dst (UMulHiL src rax));
11513   effect(USE_KILL rax, KILL cr);
11514 
11515   ins_cost(300);
11516   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11517   ins_encode %{
11518     __ mulq($src$$Register);
11519   %}
11520   ins_pipe(ialu_reg_reg_alu0);
11521 %}
11522 
11523 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11524                    rFlagsReg cr)
11525 %{
11526   match(Set rax (DivI rax div));
11527   effect(KILL rdx, KILL cr);
11528 
11529   ins_cost(30*100+10*100); // XXX
11530   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11531             "jne,s   normal\n\t"
11532             "xorl    rdx, rdx\n\t"
11533             "cmpl    $div, -1\n\t"
11534             "je,s    done\n"
11535     "normal: cdql\n\t"
11536             "idivl   $div\n"
11537     "done:"        %}
11538   ins_encode(cdql_enc(div));
11539   ins_pipe(ialu_reg_reg_alu0);
11540 %}
11541 
11542 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11543                    rFlagsReg cr)
11544 %{
11545   match(Set rax (DivL rax div));
11546   effect(KILL rdx, KILL cr);
11547 
11548   ins_cost(30*100+10*100); // XXX
11549   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11550             "cmpq    rax, rdx\n\t"
11551             "jne,s   normal\n\t"
11552             "xorl    rdx, rdx\n\t"
11553             "cmpq    $div, -1\n\t"
11554             "je,s    done\n"
11555     "normal: cdqq\n\t"
11556             "idivq   $div\n"
11557     "done:"        %}
11558   ins_encode(cdqq_enc(div));
11559   ins_pipe(ialu_reg_reg_alu0);
11560 %}
11561 
11562 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11563 %{
11564   match(Set rax (UDivI rax div));
11565   effect(KILL rdx, KILL cr);
11566 
11567   ins_cost(300);
11568   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11569   ins_encode %{
11570     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11571   %}
11572   ins_pipe(ialu_reg_reg_alu0);
11573 %}
11574 
11575 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11576 %{
11577   match(Set rax (UDivL rax div));
11578   effect(KILL rdx, KILL cr);
11579 
11580   ins_cost(300);
11581   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11582   ins_encode %{
11583      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11584   %}
11585   ins_pipe(ialu_reg_reg_alu0);
11586 %}
11587 
11588 // Integer DIVMOD with Register, both quotient and mod results
11589 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11590                              rFlagsReg cr)
11591 %{
11592   match(DivModI rax div);
11593   effect(KILL cr);
11594 
11595   ins_cost(30*100+10*100); // XXX
11596   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11597             "jne,s   normal\n\t"
11598             "xorl    rdx, rdx\n\t"
11599             "cmpl    $div, -1\n\t"
11600             "je,s    done\n"
11601     "normal: cdql\n\t"
11602             "idivl   $div\n"
11603     "done:"        %}
11604   ins_encode(cdql_enc(div));
11605   ins_pipe(pipe_slow);
11606 %}
11607 
11608 // Long DIVMOD with Register, both quotient and mod results
11609 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11610                              rFlagsReg cr)
11611 %{
11612   match(DivModL rax div);
11613   effect(KILL cr);
11614 
11615   ins_cost(30*100+10*100); // XXX
11616   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11617             "cmpq    rax, rdx\n\t"
11618             "jne,s   normal\n\t"
11619             "xorl    rdx, rdx\n\t"
11620             "cmpq    $div, -1\n\t"
11621             "je,s    done\n"
11622     "normal: cdqq\n\t"
11623             "idivq   $div\n"
11624     "done:"        %}
11625   ins_encode(cdqq_enc(div));
11626   ins_pipe(pipe_slow);
11627 %}
11628 
11629 // Unsigned integer DIVMOD with Register, both quotient and mod results
11630 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11631                               no_rax_rdx_RegI div, rFlagsReg cr)
11632 %{
11633   match(UDivModI rax div);
11634   effect(TEMP tmp, KILL cr);
11635 
11636   ins_cost(300);
11637   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11638             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11639           %}
11640   ins_encode %{
11641     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11642   %}
11643   ins_pipe(pipe_slow);
11644 %}
11645 
11646 // Unsigned long DIVMOD with Register, both quotient and mod results
11647 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11648                               no_rax_rdx_RegL div, rFlagsReg cr)
11649 %{
11650   match(UDivModL rax div);
11651   effect(TEMP tmp, KILL cr);
11652 
11653   ins_cost(300);
11654   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11655             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11656           %}
11657   ins_encode %{
11658     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11659   %}
11660   ins_pipe(pipe_slow);
11661 %}
11662 
11663 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11664                    rFlagsReg cr)
11665 %{
11666   match(Set rdx (ModI rax div));
11667   effect(KILL rax, KILL cr);
11668 
11669   ins_cost(300); // XXX
11670   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11671             "jne,s   normal\n\t"
11672             "xorl    rdx, rdx\n\t"
11673             "cmpl    $div, -1\n\t"
11674             "je,s    done\n"
11675     "normal: cdql\n\t"
11676             "idivl   $div\n"
11677     "done:"        %}
11678   ins_encode(cdql_enc(div));
11679   ins_pipe(ialu_reg_reg_alu0);
11680 %}
11681 
11682 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11683                    rFlagsReg cr)
11684 %{
11685   match(Set rdx (ModL rax div));
11686   effect(KILL rax, KILL cr);
11687 
11688   ins_cost(300); // XXX
11689   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11690             "cmpq    rax, rdx\n\t"
11691             "jne,s   normal\n\t"
11692             "xorl    rdx, rdx\n\t"
11693             "cmpq    $div, -1\n\t"
11694             "je,s    done\n"
11695     "normal: cdqq\n\t"
11696             "idivq   $div\n"
11697     "done:"        %}
11698   ins_encode(cdqq_enc(div));
11699   ins_pipe(ialu_reg_reg_alu0);
11700 %}
11701 
11702 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11703 %{
11704   match(Set rdx (UModI rax div));
11705   effect(KILL rax, KILL cr);
11706 
11707   ins_cost(300);
11708   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11709   ins_encode %{
11710     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11711   %}
11712   ins_pipe(ialu_reg_reg_alu0);
11713 %}
11714 
11715 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11716 %{
11717   match(Set rdx (UModL rax div));
11718   effect(KILL rax, KILL cr);
11719 
11720   ins_cost(300);
11721   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11722   ins_encode %{
11723     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11724   %}
11725   ins_pipe(ialu_reg_reg_alu0);
11726 %}
11727 
11728 // Integer Shift Instructions
11729 // Shift Left by one, two, three
11730 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11731 %{
11732   predicate(!UseAPX);
11733   match(Set dst (LShiftI dst shift));
11734   effect(KILL cr);
11735 
11736   format %{ "sall    $dst, $shift" %}
11737   ins_encode %{
11738     __ sall($dst$$Register, $shift$$constant);
11739   %}
11740   ins_pipe(ialu_reg);
11741 %}
11742 
11743 // Shift Left by one, two, three
11744 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11745 %{
11746   predicate(UseAPX);
11747   match(Set dst (LShiftI src shift));
11748   effect(KILL cr);
11749   flag(PD::Flag_ndd_demotable_opr1);
11750 
11751   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11752   ins_encode %{
11753     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11754   %}
11755   ins_pipe(ialu_reg);
11756 %}
11757 
11758 // Shift Left by 8-bit immediate
11759 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11760 %{
11761   predicate(!UseAPX);
11762   match(Set dst (LShiftI dst shift));
11763   effect(KILL cr);
11764 
11765   format %{ "sall    $dst, $shift" %}
11766   ins_encode %{
11767     __ sall($dst$$Register, $shift$$constant);
11768   %}
11769   ins_pipe(ialu_reg);
11770 %}
11771 
11772 // Shift Left by 8-bit immediate
11773 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11774 %{
11775   predicate(UseAPX);
11776   match(Set dst (LShiftI src shift));
11777   effect(KILL cr);
11778   flag(PD::Flag_ndd_demotable_opr1);
11779 
11780   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11781   ins_encode %{
11782     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11783   %}
11784   ins_pipe(ialu_reg);
11785 %}
11786 
11787 // Shift Left by 8-bit immediate
11788 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11789 %{
11790   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11791   effect(KILL cr);
11792 
11793   format %{ "sall    $dst, $shift" %}
11794   ins_encode %{
11795     __ sall($dst$$Address, $shift$$constant);
11796   %}
11797   ins_pipe(ialu_mem_imm);
11798 %}
11799 
11800 // Shift Left by variable
11801 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11802 %{
11803   predicate(!VM_Version::supports_bmi2());
11804   match(Set dst (LShiftI dst shift));
11805   effect(KILL cr);
11806 
11807   format %{ "sall    $dst, $shift" %}
11808   ins_encode %{
11809     __ sall($dst$$Register);
11810   %}
11811   ins_pipe(ialu_reg_reg);
11812 %}
11813 
11814 // Shift Left by variable
11815 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11816 %{
11817   predicate(!VM_Version::supports_bmi2());
11818   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11819   effect(KILL cr);
11820 
11821   format %{ "sall    $dst, $shift" %}
11822   ins_encode %{
11823     __ sall($dst$$Address);
11824   %}
11825   ins_pipe(ialu_mem_reg);
11826 %}
11827 
11828 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11829 %{
11830   predicate(VM_Version::supports_bmi2());
11831   match(Set dst (LShiftI src shift));
11832 
11833   format %{ "shlxl   $dst, $src, $shift" %}
11834   ins_encode %{
11835     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11836   %}
11837   ins_pipe(ialu_reg_reg);
11838 %}
11839 
11840 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11841 %{
11842   predicate(VM_Version::supports_bmi2());
11843   match(Set dst (LShiftI (LoadI src) shift));
11844   ins_cost(175);
11845   format %{ "shlxl   $dst, $src, $shift" %}
11846   ins_encode %{
11847     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11848   %}
11849   ins_pipe(ialu_reg_mem);
11850 %}
11851 
11852 // Arithmetic Shift Right by 8-bit immediate
11853 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11854 %{
11855   predicate(!UseAPX);
11856   match(Set dst (RShiftI dst shift));
11857   effect(KILL cr);
11858 
11859   format %{ "sarl    $dst, $shift" %}
11860   ins_encode %{
11861     __ sarl($dst$$Register, $shift$$constant);
11862   %}
11863   ins_pipe(ialu_mem_imm);
11864 %}
11865 
11866 // Arithmetic Shift Right by 8-bit immediate
11867 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11868 %{
11869   predicate(UseAPX);
11870   match(Set dst (RShiftI src shift));
11871   effect(KILL cr);
11872   flag(PD::Flag_ndd_demotable_opr1);
11873 
11874   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
11875   ins_encode %{
11876     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
11877   %}
11878   ins_pipe(ialu_mem_imm);
11879 %}
11880 
11881 // Arithmetic Shift Right by 8-bit immediate
11882 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11883 %{
11884   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11885   effect(KILL cr);
11886 
11887   format %{ "sarl    $dst, $shift" %}
11888   ins_encode %{
11889     __ sarl($dst$$Address, $shift$$constant);
11890   %}
11891   ins_pipe(ialu_mem_imm);
11892 %}
11893 
11894 // Arithmetic Shift Right by variable
11895 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11896 %{
11897   predicate(!VM_Version::supports_bmi2());
11898   match(Set dst (RShiftI dst shift));
11899   effect(KILL cr);
11900 
11901   format %{ "sarl    $dst, $shift" %}
11902   ins_encode %{
11903     __ sarl($dst$$Register);
11904   %}
11905   ins_pipe(ialu_reg_reg);
11906 %}
11907 
11908 // Arithmetic Shift Right by variable
11909 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11910 %{
11911   predicate(!VM_Version::supports_bmi2());
11912   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11913   effect(KILL cr);
11914 
11915   format %{ "sarl    $dst, $shift" %}
11916   ins_encode %{
11917     __ sarl($dst$$Address);
11918   %}
11919   ins_pipe(ialu_mem_reg);
11920 %}
11921 
11922 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11923 %{
11924   predicate(VM_Version::supports_bmi2());
11925   match(Set dst (RShiftI src shift));
11926 
11927   format %{ "sarxl   $dst, $src, $shift" %}
11928   ins_encode %{
11929     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
11930   %}
11931   ins_pipe(ialu_reg_reg);
11932 %}
11933 
11934 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
11935 %{
11936   predicate(VM_Version::supports_bmi2());
11937   match(Set dst (RShiftI (LoadI src) shift));
11938   ins_cost(175);
11939   format %{ "sarxl   $dst, $src, $shift" %}
11940   ins_encode %{
11941     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
11942   %}
11943   ins_pipe(ialu_reg_mem);
11944 %}
11945 
11946 // Logical Shift Right by 8-bit immediate
11947 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11948 %{
11949   predicate(!UseAPX);
11950   match(Set dst (URShiftI dst shift));
11951   effect(KILL cr);
11952 
11953   format %{ "shrl    $dst, $shift" %}
11954   ins_encode %{
11955     __ shrl($dst$$Register, $shift$$constant);
11956   %}
11957   ins_pipe(ialu_reg);
11958 %}
11959 
11960 // Logical Shift Right by 8-bit immediate
11961 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11962 %{
11963   predicate(UseAPX);
11964   match(Set dst (URShiftI src shift));
11965   effect(KILL cr);
11966   flag(PD::Flag_ndd_demotable_opr1);
11967 
11968   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
11969   ins_encode %{
11970     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
11971   %}
11972   ins_pipe(ialu_reg);
11973 %}
11974 
11975 // Logical Shift Right by 8-bit immediate
11976 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11977 %{
11978   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
11979   effect(KILL cr);
11980 
11981   format %{ "shrl    $dst, $shift" %}
11982   ins_encode %{
11983     __ shrl($dst$$Address, $shift$$constant);
11984   %}
11985   ins_pipe(ialu_mem_imm);
11986 %}
11987 
11988 // Logical Shift Right by variable
11989 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11990 %{
11991   predicate(!VM_Version::supports_bmi2());
11992   match(Set dst (URShiftI dst shift));
11993   effect(KILL cr);
11994 
11995   format %{ "shrl    $dst, $shift" %}
11996   ins_encode %{
11997     __ shrl($dst$$Register);
11998   %}
11999   ins_pipe(ialu_reg_reg);
12000 %}
12001 
12002 // Logical Shift Right by variable
12003 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12004 %{
12005   predicate(!VM_Version::supports_bmi2());
12006   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12007   effect(KILL cr);
12008 
12009   format %{ "shrl    $dst, $shift" %}
12010   ins_encode %{
12011     __ shrl($dst$$Address);
12012   %}
12013   ins_pipe(ialu_mem_reg);
12014 %}
12015 
12016 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12017 %{
12018   predicate(VM_Version::supports_bmi2());
12019   match(Set dst (URShiftI src shift));
12020 
12021   format %{ "shrxl   $dst, $src, $shift" %}
12022   ins_encode %{
12023     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12024   %}
12025   ins_pipe(ialu_reg_reg);
12026 %}
12027 
12028 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12029 %{
12030   predicate(VM_Version::supports_bmi2());
12031   match(Set dst (URShiftI (LoadI src) shift));
12032   ins_cost(175);
12033   format %{ "shrxl   $dst, $src, $shift" %}
12034   ins_encode %{
12035     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12036   %}
12037   ins_pipe(ialu_reg_mem);
12038 %}
12039 
12040 // Long Shift Instructions
12041 // Shift Left by one, two, three
12042 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12043 %{
12044   predicate(!UseAPX);
12045   match(Set dst (LShiftL dst shift));
12046   effect(KILL cr);
12047 
12048   format %{ "salq    $dst, $shift" %}
12049   ins_encode %{
12050     __ salq($dst$$Register, $shift$$constant);
12051   %}
12052   ins_pipe(ialu_reg);
12053 %}
12054 
12055 // Shift Left by one, two, three
12056 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12057 %{
12058   predicate(UseAPX);
12059   match(Set dst (LShiftL src shift));
12060   effect(KILL cr);
12061   flag(PD::Flag_ndd_demotable_opr1);
12062 
12063   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12064   ins_encode %{
12065     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12066   %}
12067   ins_pipe(ialu_reg);
12068 %}
12069 
12070 // Shift Left by 8-bit immediate
12071 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12072 %{
12073   predicate(!UseAPX);
12074   match(Set dst (LShiftL dst shift));
12075   effect(KILL cr);
12076 
12077   format %{ "salq    $dst, $shift" %}
12078   ins_encode %{
12079     __ salq($dst$$Register, $shift$$constant);
12080   %}
12081   ins_pipe(ialu_reg);
12082 %}
12083 
12084 // Shift Left by 8-bit immediate
12085 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12086 %{
12087   predicate(UseAPX);
12088   match(Set dst (LShiftL src shift));
12089   effect(KILL cr);
12090   flag(PD::Flag_ndd_demotable_opr1);
12091 
12092   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12093   ins_encode %{
12094     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12095   %}
12096   ins_pipe(ialu_reg);
12097 %}
12098 
12099 // Shift Left by 8-bit immediate
12100 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12101 %{
12102   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12103   effect(KILL cr);
12104 
12105   format %{ "salq    $dst, $shift" %}
12106   ins_encode %{
12107     __ salq($dst$$Address, $shift$$constant);
12108   %}
12109   ins_pipe(ialu_mem_imm);
12110 %}
12111 
12112 // Shift Left by variable
12113 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12114 %{
12115   predicate(!VM_Version::supports_bmi2());
12116   match(Set dst (LShiftL dst shift));
12117   effect(KILL cr);
12118 
12119   format %{ "salq    $dst, $shift" %}
12120   ins_encode %{
12121     __ salq($dst$$Register);
12122   %}
12123   ins_pipe(ialu_reg_reg);
12124 %}
12125 
12126 // Shift Left by variable
12127 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12128 %{
12129   predicate(!VM_Version::supports_bmi2());
12130   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12131   effect(KILL cr);
12132 
12133   format %{ "salq    $dst, $shift" %}
12134   ins_encode %{
12135     __ salq($dst$$Address);
12136   %}
12137   ins_pipe(ialu_mem_reg);
12138 %}
12139 
12140 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12141 %{
12142   predicate(VM_Version::supports_bmi2());
12143   match(Set dst (LShiftL src shift));
12144 
12145   format %{ "shlxq   $dst, $src, $shift" %}
12146   ins_encode %{
12147     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12148   %}
12149   ins_pipe(ialu_reg_reg);
12150 %}
12151 
12152 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12153 %{
12154   predicate(VM_Version::supports_bmi2());
12155   match(Set dst (LShiftL (LoadL src) shift));
12156   ins_cost(175);
12157   format %{ "shlxq   $dst, $src, $shift" %}
12158   ins_encode %{
12159     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12160   %}
12161   ins_pipe(ialu_reg_mem);
12162 %}
12163 
12164 // Arithmetic Shift Right by 8-bit immediate
12165 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12166 %{
12167   predicate(!UseAPX);
12168   match(Set dst (RShiftL dst shift));
12169   effect(KILL cr);
12170 
12171   format %{ "sarq    $dst, $shift" %}
12172   ins_encode %{
12173     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12174   %}
12175   ins_pipe(ialu_mem_imm);
12176 %}
12177 
12178 // Arithmetic Shift Right by 8-bit immediate
12179 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12180 %{
12181   predicate(UseAPX);
12182   match(Set dst (RShiftL src shift));
12183   effect(KILL cr);
12184   flag(PD::Flag_ndd_demotable_opr1);
12185 
12186   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12187   ins_encode %{
12188     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12189   %}
12190   ins_pipe(ialu_mem_imm);
12191 %}
12192 
12193 // Arithmetic Shift Right by 8-bit immediate
12194 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12195 %{
12196   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12197   effect(KILL cr);
12198 
12199   format %{ "sarq    $dst, $shift" %}
12200   ins_encode %{
12201     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12202   %}
12203   ins_pipe(ialu_mem_imm);
12204 %}
12205 
12206 // Arithmetic Shift Right by variable
12207 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12208 %{
12209   predicate(!VM_Version::supports_bmi2());
12210   match(Set dst (RShiftL dst shift));
12211   effect(KILL cr);
12212 
12213   format %{ "sarq    $dst, $shift" %}
12214   ins_encode %{
12215     __ sarq($dst$$Register);
12216   %}
12217   ins_pipe(ialu_reg_reg);
12218 %}
12219 
12220 // Arithmetic Shift Right by variable
12221 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12222 %{
12223   predicate(!VM_Version::supports_bmi2());
12224   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12225   effect(KILL cr);
12226 
12227   format %{ "sarq    $dst, $shift" %}
12228   ins_encode %{
12229     __ sarq($dst$$Address);
12230   %}
12231   ins_pipe(ialu_mem_reg);
12232 %}
12233 
12234 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12235 %{
12236   predicate(VM_Version::supports_bmi2());
12237   match(Set dst (RShiftL src shift));
12238 
12239   format %{ "sarxq   $dst, $src, $shift" %}
12240   ins_encode %{
12241     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12242   %}
12243   ins_pipe(ialu_reg_reg);
12244 %}
12245 
12246 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12247 %{
12248   predicate(VM_Version::supports_bmi2());
12249   match(Set dst (RShiftL (LoadL src) shift));
12250   ins_cost(175);
12251   format %{ "sarxq   $dst, $src, $shift" %}
12252   ins_encode %{
12253     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12254   %}
12255   ins_pipe(ialu_reg_mem);
12256 %}
12257 
12258 // Logical Shift Right by 8-bit immediate
12259 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12260 %{
12261   predicate(!UseAPX);
12262   match(Set dst (URShiftL dst shift));
12263   effect(KILL cr);
12264 
12265   format %{ "shrq    $dst, $shift" %}
12266   ins_encode %{
12267     __ shrq($dst$$Register, $shift$$constant);
12268   %}
12269   ins_pipe(ialu_reg);
12270 %}
12271 
12272 // Logical Shift Right by 8-bit immediate
12273 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12274 %{
12275   predicate(UseAPX);
12276   match(Set dst (URShiftL src shift));
12277   effect(KILL cr);
12278   flag(PD::Flag_ndd_demotable_opr1);
12279 
12280   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12281   ins_encode %{
12282     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12283   %}
12284   ins_pipe(ialu_reg);
12285 %}
12286 
12287 // Logical Shift Right by 8-bit immediate
12288 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12289 %{
12290   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12291   effect(KILL cr);
12292 
12293   format %{ "shrq    $dst, $shift" %}
12294   ins_encode %{
12295     __ shrq($dst$$Address, $shift$$constant);
12296   %}
12297   ins_pipe(ialu_mem_imm);
12298 %}
12299 
12300 // Logical Shift Right by variable
12301 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12302 %{
12303   predicate(!VM_Version::supports_bmi2());
12304   match(Set dst (URShiftL dst shift));
12305   effect(KILL cr);
12306 
12307   format %{ "shrq    $dst, $shift" %}
12308   ins_encode %{
12309     __ shrq($dst$$Register);
12310   %}
12311   ins_pipe(ialu_reg_reg);
12312 %}
12313 
12314 // Logical Shift Right by variable
12315 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12316 %{
12317   predicate(!VM_Version::supports_bmi2());
12318   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12319   effect(KILL cr);
12320 
12321   format %{ "shrq    $dst, $shift" %}
12322   ins_encode %{
12323     __ shrq($dst$$Address);
12324   %}
12325   ins_pipe(ialu_mem_reg);
12326 %}
12327 
12328 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12329 %{
12330   predicate(VM_Version::supports_bmi2());
12331   match(Set dst (URShiftL src shift));
12332 
12333   format %{ "shrxq   $dst, $src, $shift" %}
12334   ins_encode %{
12335     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12336   %}
12337   ins_pipe(ialu_reg_reg);
12338 %}
12339 
12340 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12341 %{
12342   predicate(VM_Version::supports_bmi2());
12343   match(Set dst (URShiftL (LoadL src) shift));
12344   ins_cost(175);
12345   format %{ "shrxq   $dst, $src, $shift" %}
12346   ins_encode %{
12347     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12348   %}
12349   ins_pipe(ialu_reg_mem);
12350 %}
12351 
12352 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12353 // This idiom is used by the compiler for the i2b bytecode.
12354 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12355 %{
12356   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12357 
12358   format %{ "movsbl  $dst, $src\t# i2b" %}
12359   ins_encode %{
12360     __ movsbl($dst$$Register, $src$$Register);
12361   %}
12362   ins_pipe(ialu_reg_reg);
12363 %}
12364 
12365 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12366 // This idiom is used by the compiler the i2s bytecode.
12367 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12368 %{
12369   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12370 
12371   format %{ "movswl  $dst, $src\t# i2s" %}
12372   ins_encode %{
12373     __ movswl($dst$$Register, $src$$Register);
12374   %}
12375   ins_pipe(ialu_reg_reg);
12376 %}
12377 
12378 // ROL/ROR instructions
12379 
12380 // Rotate left by constant.
12381 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12382 %{
12383   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12384   match(Set dst (RotateLeft dst shift));
12385   effect(KILL cr);
12386   format %{ "roll    $dst, $shift" %}
12387   ins_encode %{
12388     __ roll($dst$$Register, $shift$$constant);
12389   %}
12390   ins_pipe(ialu_reg);
12391 %}
12392 
12393 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12394 %{
12395   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12396   match(Set dst (RotateLeft src shift));
12397   format %{ "rolxl   $dst, $src, $shift" %}
12398   ins_encode %{
12399     int shift = 32 - ($shift$$constant & 31);
12400     __ rorxl($dst$$Register, $src$$Register, shift);
12401   %}
12402   ins_pipe(ialu_reg_reg);
12403 %}
12404 
12405 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12406 %{
12407   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12408   match(Set dst (RotateLeft (LoadI src) shift));
12409   ins_cost(175);
12410   format %{ "rolxl   $dst, $src, $shift" %}
12411   ins_encode %{
12412     int shift = 32 - ($shift$$constant & 31);
12413     __ rorxl($dst$$Register, $src$$Address, shift);
12414   %}
12415   ins_pipe(ialu_reg_mem);
12416 %}
12417 
12418 // Rotate Left by variable
12419 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12420 %{
12421   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12422   match(Set dst (RotateLeft dst shift));
12423   effect(KILL cr);
12424   format %{ "roll    $dst, $shift" %}
12425   ins_encode %{
12426     __ roll($dst$$Register);
12427   %}
12428   ins_pipe(ialu_reg_reg);
12429 %}
12430 
12431 // Rotate Left by variable
12432 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12433 %{
12434   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12435   match(Set dst (RotateLeft src shift));
12436   effect(KILL cr);
12437   flag(PD::Flag_ndd_demotable_opr1);
12438 
12439   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12440   ins_encode %{
12441     __ eroll($dst$$Register, $src$$Register, false);
12442   %}
12443   ins_pipe(ialu_reg_reg);
12444 %}
12445 
12446 // Rotate Right by constant.
12447 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12448 %{
12449   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12450   match(Set dst (RotateRight dst shift));
12451   effect(KILL cr);
12452   format %{ "rorl    $dst, $shift" %}
12453   ins_encode %{
12454     __ rorl($dst$$Register, $shift$$constant);
12455   %}
12456   ins_pipe(ialu_reg);
12457 %}
12458 
12459 // Rotate Right by constant.
12460 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12461 %{
12462   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12463   match(Set dst (RotateRight src shift));
12464   format %{ "rorxl   $dst, $src, $shift" %}
12465   ins_encode %{
12466     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12467   %}
12468   ins_pipe(ialu_reg_reg);
12469 %}
12470 
12471 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12472 %{
12473   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12474   match(Set dst (RotateRight (LoadI src) shift));
12475   ins_cost(175);
12476   format %{ "rorxl   $dst, $src, $shift" %}
12477   ins_encode %{
12478     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12479   %}
12480   ins_pipe(ialu_reg_mem);
12481 %}
12482 
12483 // Rotate Right by variable
12484 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12485 %{
12486   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12487   match(Set dst (RotateRight dst shift));
12488   effect(KILL cr);
12489   format %{ "rorl    $dst, $shift" %}
12490   ins_encode %{
12491     __ rorl($dst$$Register);
12492   %}
12493   ins_pipe(ialu_reg_reg);
12494 %}
12495 
12496 // Rotate Right by variable
12497 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12498 %{
12499   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12500   match(Set dst (RotateRight src shift));
12501   effect(KILL cr);
12502   flag(PD::Flag_ndd_demotable_opr1);
12503 
12504   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12505   ins_encode %{
12506     __ erorl($dst$$Register, $src$$Register, false);
12507   %}
12508   ins_pipe(ialu_reg_reg);
12509 %}
12510 
12511 // Rotate Left by constant.
12512 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12513 %{
12514   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12515   match(Set dst (RotateLeft dst shift));
12516   effect(KILL cr);
12517   format %{ "rolq    $dst, $shift" %}
12518   ins_encode %{
12519     __ rolq($dst$$Register, $shift$$constant);
12520   %}
12521   ins_pipe(ialu_reg);
12522 %}
12523 
12524 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12525 %{
12526   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12527   match(Set dst (RotateLeft src shift));
12528   format %{ "rolxq   $dst, $src, $shift" %}
12529   ins_encode %{
12530     int shift = 64 - ($shift$$constant & 63);
12531     __ rorxq($dst$$Register, $src$$Register, shift);
12532   %}
12533   ins_pipe(ialu_reg_reg);
12534 %}
12535 
12536 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12537 %{
12538   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12539   match(Set dst (RotateLeft (LoadL src) shift));
12540   ins_cost(175);
12541   format %{ "rolxq   $dst, $src, $shift" %}
12542   ins_encode %{
12543     int shift = 64 - ($shift$$constant & 63);
12544     __ rorxq($dst$$Register, $src$$Address, shift);
12545   %}
12546   ins_pipe(ialu_reg_mem);
12547 %}
12548 
12549 // Rotate Left by variable
12550 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12551 %{
12552   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12553   match(Set dst (RotateLeft dst shift));
12554   effect(KILL cr);
12555 
12556   format %{ "rolq    $dst, $shift" %}
12557   ins_encode %{
12558     __ rolq($dst$$Register);
12559   %}
12560   ins_pipe(ialu_reg_reg);
12561 %}
12562 
12563 // Rotate Left by variable
12564 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12565 %{
12566   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12567   match(Set dst (RotateLeft src shift));
12568   effect(KILL cr);
12569   flag(PD::Flag_ndd_demotable_opr1);
12570 
12571   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12572   ins_encode %{
12573     __ erolq($dst$$Register, $src$$Register, false);
12574   %}
12575   ins_pipe(ialu_reg_reg);
12576 %}
12577 
12578 // Rotate Right by constant.
12579 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12580 %{
12581   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12582   match(Set dst (RotateRight dst shift));
12583   effect(KILL cr);
12584   format %{ "rorq    $dst, $shift" %}
12585   ins_encode %{
12586     __ rorq($dst$$Register, $shift$$constant);
12587   %}
12588   ins_pipe(ialu_reg);
12589 %}
12590 
12591 // Rotate Right by constant
12592 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12593 %{
12594   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12595   match(Set dst (RotateRight src shift));
12596   format %{ "rorxq   $dst, $src, $shift" %}
12597   ins_encode %{
12598     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12599   %}
12600   ins_pipe(ialu_reg_reg);
12601 %}
12602 
12603 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12604 %{
12605   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12606   match(Set dst (RotateRight (LoadL src) shift));
12607   ins_cost(175);
12608   format %{ "rorxq   $dst, $src, $shift" %}
12609   ins_encode %{
12610     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12611   %}
12612   ins_pipe(ialu_reg_mem);
12613 %}
12614 
12615 // Rotate Right by variable
12616 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12617 %{
12618   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12619   match(Set dst (RotateRight dst shift));
12620   effect(KILL cr);
12621   format %{ "rorq    $dst, $shift" %}
12622   ins_encode %{
12623     __ rorq($dst$$Register);
12624   %}
12625   ins_pipe(ialu_reg_reg);
12626 %}
12627 
12628 // Rotate Right by variable
12629 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12630 %{
12631   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12632   match(Set dst (RotateRight src shift));
12633   effect(KILL cr);
12634   flag(PD::Flag_ndd_demotable_opr1);
12635 
12636   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12637   ins_encode %{
12638     __ erorq($dst$$Register, $src$$Register, false);
12639   %}
12640   ins_pipe(ialu_reg_reg);
12641 %}
12642 
12643 //----------------------------- CompressBits/ExpandBits ------------------------
12644 
12645 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12646   predicate(n->bottom_type()->isa_long());
12647   match(Set dst (CompressBits src mask));
12648   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12649   ins_encode %{
12650     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12651   %}
12652   ins_pipe( pipe_slow );
12653 %}
12654 
12655 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12656   predicate(n->bottom_type()->isa_long());
12657   match(Set dst (ExpandBits src mask));
12658   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12659   ins_encode %{
12660     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12661   %}
12662   ins_pipe( pipe_slow );
12663 %}
12664 
12665 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12666   predicate(n->bottom_type()->isa_long());
12667   match(Set dst (CompressBits src (LoadL mask)));
12668   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12669   ins_encode %{
12670     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12671   %}
12672   ins_pipe( pipe_slow );
12673 %}
12674 
12675 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12676   predicate(n->bottom_type()->isa_long());
12677   match(Set dst (ExpandBits src (LoadL mask)));
12678   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12679   ins_encode %{
12680     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12681   %}
12682   ins_pipe( pipe_slow );
12683 %}
12684 
12685 
12686 // Logical Instructions
12687 
12688 // Integer Logical Instructions
12689 
12690 // And Instructions
12691 // And Register with Register
12692 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12693 %{
12694   predicate(!UseAPX);
12695   match(Set dst (AndI dst src));
12696   effect(KILL cr);
12697   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12698 
12699   format %{ "andl    $dst, $src\t# int" %}
12700   ins_encode %{
12701     __ andl($dst$$Register, $src$$Register);
12702   %}
12703   ins_pipe(ialu_reg_reg);
12704 %}
12705 
12706 // And Register with Register using New Data Destination (NDD)
12707 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12708 %{
12709   predicate(UseAPX);
12710   match(Set dst (AndI src1 src2));
12711   effect(KILL cr);
12712   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12713 
12714   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12715   ins_encode %{
12716     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12717 
12718   %}
12719   ins_pipe(ialu_reg_reg);
12720 %}
12721 
12722 // And Register with Immediate 255
12723 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12724 %{
12725   match(Set dst (AndI src mask));
12726 
12727   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
12728   ins_encode %{
12729     __ movzbl($dst$$Register, $src$$Register);
12730   %}
12731   ins_pipe(ialu_reg);
12732 %}
12733 
12734 // And Register with Immediate 255 and promote to long
12735 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12736 %{
12737   match(Set dst (ConvI2L (AndI src mask)));
12738 
12739   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
12740   ins_encode %{
12741     __ movzbl($dst$$Register, $src$$Register);
12742   %}
12743   ins_pipe(ialu_reg);
12744 %}
12745 
12746 // And Register with Immediate 65535
12747 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12748 %{
12749   match(Set dst (AndI src mask));
12750 
12751   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
12752   ins_encode %{
12753     __ movzwl($dst$$Register, $src$$Register);
12754   %}
12755   ins_pipe(ialu_reg);
12756 %}
12757 
12758 // And Register with Immediate 65535 and promote to long
12759 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12760 %{
12761   match(Set dst (ConvI2L (AndI src mask)));
12762 
12763   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
12764   ins_encode %{
12765     __ movzwl($dst$$Register, $src$$Register);
12766   %}
12767   ins_pipe(ialu_reg);
12768 %}
12769 
12770 // Can skip int2long conversions after AND with small bitmask
12771 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12772 %{
12773   predicate(VM_Version::supports_bmi2());
12774   ins_cost(125);
12775   effect(TEMP tmp, KILL cr);
12776   match(Set dst (ConvI2L (AndI src mask)));
12777   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
12778   ins_encode %{
12779     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12780     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12781   %}
12782   ins_pipe(ialu_reg_reg);
12783 %}
12784 
12785 // And Register with Immediate
12786 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12787 %{
12788   predicate(!UseAPX);
12789   match(Set dst (AndI dst src));
12790   effect(KILL cr);
12791   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12792 
12793   format %{ "andl    $dst, $src\t# int" %}
12794   ins_encode %{
12795     __ andl($dst$$Register, $src$$constant);
12796   %}
12797   ins_pipe(ialu_reg);
12798 %}
12799 
12800 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12801 %{
12802   predicate(UseAPX);
12803   match(Set dst (AndI src1 src2));
12804   effect(KILL cr);
12805   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
12806 
12807   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
12808   ins_encode %{
12809     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
12810   %}
12811   ins_pipe(ialu_reg);
12812 %}
12813 
12814 // And Register with Memory
12815 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
12816 %{
12817   match(Set dst (AndI dst (LoadI src)));
12818   effect(KILL cr);
12819   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12820 
12821   ins_cost(150);
12822   format %{ "andl    $dst, $src\t# int" %}
12823   ins_encode %{
12824     __ andl($dst$$Register, $src$$Address);
12825   %}
12826   ins_pipe(ialu_reg_mem);
12827 %}
12828 
12829 // And Memory with Register
12830 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12831 %{
12832   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
12833   effect(KILL cr);
12834   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12835 
12836   ins_cost(150);
12837   format %{ "andb    $dst, $src\t# byte" %}
12838   ins_encode %{
12839     __ andb($dst$$Address, $src$$Register);
12840   %}
12841   ins_pipe(ialu_mem_reg);
12842 %}
12843 
12844 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12845 %{
12846   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12847   effect(KILL cr);
12848   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12849 
12850   ins_cost(150);
12851   format %{ "andl    $dst, $src\t# int" %}
12852   ins_encode %{
12853     __ andl($dst$$Address, $src$$Register);
12854   %}
12855   ins_pipe(ialu_mem_reg);
12856 %}
12857 
12858 // And Memory with Immediate
12859 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
12860 %{
12861   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12862   effect(KILL cr);
12863   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12864 
12865   ins_cost(125);
12866   format %{ "andl    $dst, $src\t# int" %}
12867   ins_encode %{
12868     __ andl($dst$$Address, $src$$constant);
12869   %}
12870   ins_pipe(ialu_mem_imm);
12871 %}
12872 
12873 // BMI1 instructions
12874 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
12875   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
12876   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12877   effect(KILL cr);
12878   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12879 
12880   ins_cost(125);
12881   format %{ "andnl  $dst, $src1, $src2" %}
12882 
12883   ins_encode %{
12884     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
12885   %}
12886   ins_pipe(ialu_reg_mem);
12887 %}
12888 
12889 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
12890   match(Set dst (AndI (XorI src1 minus_1) src2));
12891   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12892   effect(KILL cr);
12893   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12894 
12895   format %{ "andnl  $dst, $src1, $src2" %}
12896 
12897   ins_encode %{
12898     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
12899   %}
12900   ins_pipe(ialu_reg);
12901 %}
12902 
12903 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
12904   match(Set dst (AndI (SubI imm_zero src) src));
12905   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12906   effect(KILL cr);
12907   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12908 
12909   format %{ "blsil  $dst, $src" %}
12910 
12911   ins_encode %{
12912     __ blsil($dst$$Register, $src$$Register);
12913   %}
12914   ins_pipe(ialu_reg);
12915 %}
12916 
12917 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
12918   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
12919   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12920   effect(KILL cr);
12921   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12922 
12923   ins_cost(125);
12924   format %{ "blsil  $dst, $src" %}
12925 
12926   ins_encode %{
12927     __ blsil($dst$$Register, $src$$Address);
12928   %}
12929   ins_pipe(ialu_reg_mem);
12930 %}
12931 
12932 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
12933 %{
12934   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
12935   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12936   effect(KILL cr);
12937   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
12938 
12939   ins_cost(125);
12940   format %{ "blsmskl $dst, $src" %}
12941 
12942   ins_encode %{
12943     __ blsmskl($dst$$Register, $src$$Address);
12944   %}
12945   ins_pipe(ialu_reg_mem);
12946 %}
12947 
12948 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
12949 %{
12950   match(Set dst (XorI (AddI src minus_1) src));
12951   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12952   effect(KILL cr);
12953   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
12954 
12955   format %{ "blsmskl $dst, $src" %}
12956 
12957   ins_encode %{
12958     __ blsmskl($dst$$Register, $src$$Register);
12959   %}
12960 
12961   ins_pipe(ialu_reg);
12962 %}
12963 
12964 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
12965 %{
12966   match(Set dst (AndI (AddI src minus_1) src) );
12967   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12968   effect(KILL cr);
12969   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12970 
12971   format %{ "blsrl  $dst, $src" %}
12972 
12973   ins_encode %{
12974     __ blsrl($dst$$Register, $src$$Register);
12975   %}
12976 
12977   ins_pipe(ialu_reg_mem);
12978 %}
12979 
12980 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
12981 %{
12982   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
12983   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12984   effect(KILL cr);
12985   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12986 
12987   ins_cost(125);
12988   format %{ "blsrl  $dst, $src" %}
12989 
12990   ins_encode %{
12991     __ blsrl($dst$$Register, $src$$Address);
12992   %}
12993 
12994   ins_pipe(ialu_reg);
12995 %}
12996 
12997 // Or Instructions
12998 // Or Register with Register
12999 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13000 %{
13001   predicate(!UseAPX);
13002   match(Set dst (OrI dst src));
13003   effect(KILL cr);
13004   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13005 
13006   format %{ "orl     $dst, $src\t# int" %}
13007   ins_encode %{
13008     __ orl($dst$$Register, $src$$Register);
13009   %}
13010   ins_pipe(ialu_reg_reg);
13011 %}
13012 
13013 // Or Register with Register using New Data Destination (NDD)
13014 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13015 %{
13016   predicate(UseAPX);
13017   match(Set dst (OrI src1 src2));
13018   effect(KILL cr);
13019   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13020 
13021   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13022   ins_encode %{
13023     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13024   %}
13025   ins_pipe(ialu_reg_reg);
13026 %}
13027 
13028 // Or Register with Immediate
13029 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13030 %{
13031   predicate(!UseAPX);
13032   match(Set dst (OrI dst src));
13033   effect(KILL cr);
13034   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13035 
13036   format %{ "orl     $dst, $src\t# int" %}
13037   ins_encode %{
13038     __ orl($dst$$Register, $src$$constant);
13039   %}
13040   ins_pipe(ialu_reg);
13041 %}
13042 
13043 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13044 %{
13045   predicate(UseAPX);
13046   match(Set dst (OrI src1 src2));
13047   effect(KILL cr);
13048   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13049 
13050   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13051   ins_encode %{
13052     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13053   %}
13054   ins_pipe(ialu_reg);
13055 %}
13056 
13057 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13058 %{
13059   predicate(UseAPX);
13060   match(Set dst (OrI src1 src2));
13061   effect(KILL cr);
13062   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13063 
13064   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13065   ins_encode %{
13066     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13067   %}
13068   ins_pipe(ialu_reg);
13069 %}
13070 
13071 // Or Register with Memory
13072 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13073 %{
13074   match(Set dst (OrI dst (LoadI src)));
13075   effect(KILL cr);
13076   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13077 
13078   ins_cost(150);
13079   format %{ "orl     $dst, $src\t# int" %}
13080   ins_encode %{
13081     __ orl($dst$$Register, $src$$Address);
13082   %}
13083   ins_pipe(ialu_reg_mem);
13084 %}
13085 
13086 // Or Memory with Register
13087 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13088 %{
13089   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13090   effect(KILL cr);
13091   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13092 
13093   ins_cost(150);
13094   format %{ "orb    $dst, $src\t# byte" %}
13095   ins_encode %{
13096     __ orb($dst$$Address, $src$$Register);
13097   %}
13098   ins_pipe(ialu_mem_reg);
13099 %}
13100 
13101 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13102 %{
13103   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13104   effect(KILL cr);
13105   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13106 
13107   ins_cost(150);
13108   format %{ "orl     $dst, $src\t# int" %}
13109   ins_encode %{
13110     __ orl($dst$$Address, $src$$Register);
13111   %}
13112   ins_pipe(ialu_mem_reg);
13113 %}
13114 
13115 // Or Memory with Immediate
13116 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13117 %{
13118   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13119   effect(KILL cr);
13120   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13121 
13122   ins_cost(125);
13123   format %{ "orl     $dst, $src\t# int" %}
13124   ins_encode %{
13125     __ orl($dst$$Address, $src$$constant);
13126   %}
13127   ins_pipe(ialu_mem_imm);
13128 %}
13129 
13130 // Xor Instructions
13131 // Xor Register with Register
13132 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13133 %{
13134   predicate(!UseAPX);
13135   match(Set dst (XorI dst src));
13136   effect(KILL cr);
13137   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13138 
13139   format %{ "xorl    $dst, $src\t# int" %}
13140   ins_encode %{
13141     __ xorl($dst$$Register, $src$$Register);
13142   %}
13143   ins_pipe(ialu_reg_reg);
13144 %}
13145 
13146 // Xor Register with Register using New Data Destination (NDD)
13147 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13148 %{
13149   predicate(UseAPX);
13150   match(Set dst (XorI src1 src2));
13151   effect(KILL cr);
13152   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13153 
13154   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13155   ins_encode %{
13156     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13157   %}
13158   ins_pipe(ialu_reg_reg);
13159 %}
13160 
13161 // Xor Register with Immediate -1
13162 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13163 %{
13164   predicate(!UseAPX);
13165   match(Set dst (XorI dst imm));
13166 
13167   format %{ "notl    $dst" %}
13168   ins_encode %{
13169      __ notl($dst$$Register);
13170   %}
13171   ins_pipe(ialu_reg);
13172 %}
13173 
13174 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13175 %{
13176   match(Set dst (XorI src imm));
13177   predicate(UseAPX);
13178   flag(PD::Flag_ndd_demotable_opr1);
13179 
13180   format %{ "enotl    $dst, $src" %}
13181   ins_encode %{
13182      __ enotl($dst$$Register, $src$$Register);
13183   %}
13184   ins_pipe(ialu_reg);
13185 %}
13186 
13187 // Xor Register with Immediate
13188 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13189 %{
13190   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13191   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13192   match(Set dst (XorI dst src));
13193   effect(KILL cr);
13194   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13195 
13196   format %{ "xorl    $dst, $src\t# int" %}
13197   ins_encode %{
13198     __ xorl($dst$$Register, $src$$constant);
13199   %}
13200   ins_pipe(ialu_reg);
13201 %}
13202 
13203 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13204 %{
13205   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13206   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13207   match(Set dst (XorI src1 src2));
13208   effect(KILL cr);
13209   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13210 
13211   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13212   ins_encode %{
13213     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13214   %}
13215   ins_pipe(ialu_reg);
13216 %}
13217 
13218 // Xor Register with Memory
13219 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13220 %{
13221   match(Set dst (XorI dst (LoadI src)));
13222   effect(KILL cr);
13223   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13224 
13225   ins_cost(150);
13226   format %{ "xorl    $dst, $src\t# int" %}
13227   ins_encode %{
13228     __ xorl($dst$$Register, $src$$Address);
13229   %}
13230   ins_pipe(ialu_reg_mem);
13231 %}
13232 
13233 // Xor Memory with Register
13234 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13235 %{
13236   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13237   effect(KILL cr);
13238   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13239 
13240   ins_cost(150);
13241   format %{ "xorb    $dst, $src\t# byte" %}
13242   ins_encode %{
13243     __ xorb($dst$$Address, $src$$Register);
13244   %}
13245   ins_pipe(ialu_mem_reg);
13246 %}
13247 
13248 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13249 %{
13250   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13251   effect(KILL cr);
13252   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13253 
13254   ins_cost(150);
13255   format %{ "xorl    $dst, $src\t# int" %}
13256   ins_encode %{
13257     __ xorl($dst$$Address, $src$$Register);
13258   %}
13259   ins_pipe(ialu_mem_reg);
13260 %}
13261 
13262 // Xor Memory with Immediate
13263 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13264 %{
13265   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13266   effect(KILL cr);
13267   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13268 
13269   ins_cost(125);
13270   format %{ "xorl    $dst, $src\t# int" %}
13271   ins_encode %{
13272     __ xorl($dst$$Address, $src$$constant);
13273   %}
13274   ins_pipe(ialu_mem_imm);
13275 %}
13276 
13277 
13278 // Long Logical Instructions
13279 
13280 // And Instructions
13281 // And Register with Register
13282 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13283 %{
13284   predicate(!UseAPX);
13285   match(Set dst (AndL dst src));
13286   effect(KILL cr);
13287   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13288 
13289   format %{ "andq    $dst, $src\t# long" %}
13290   ins_encode %{
13291     __ andq($dst$$Register, $src$$Register);
13292   %}
13293   ins_pipe(ialu_reg_reg);
13294 %}
13295 
13296 // And Register with Register using New Data Destination (NDD)
13297 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13298 %{
13299   predicate(UseAPX);
13300   match(Set dst (AndL src1 src2));
13301   effect(KILL cr);
13302   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13303 
13304   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13305   ins_encode %{
13306     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13307 
13308   %}
13309   ins_pipe(ialu_reg_reg);
13310 %}
13311 
13312 // And Register with Immediate 255
13313 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13314 %{
13315   match(Set dst (AndL src mask));
13316 
13317   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13318   ins_encode %{
13319     // movzbl zeroes out the upper 32-bit and does not need REX.W
13320     __ movzbl($dst$$Register, $src$$Register);
13321   %}
13322   ins_pipe(ialu_reg);
13323 %}
13324 
13325 // And Register with Immediate 65535
13326 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13327 %{
13328   match(Set dst (AndL src mask));
13329 
13330   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13331   ins_encode %{
13332     // movzwl zeroes out the upper 32-bit and does not need REX.W
13333     __ movzwl($dst$$Register, $src$$Register);
13334   %}
13335   ins_pipe(ialu_reg);
13336 %}
13337 
13338 // And Register with Immediate
13339 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13340 %{
13341   predicate(!UseAPX);
13342   match(Set dst (AndL dst src));
13343   effect(KILL cr);
13344   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13345 
13346   format %{ "andq    $dst, $src\t# long" %}
13347   ins_encode %{
13348     __ andq($dst$$Register, $src$$constant);
13349   %}
13350   ins_pipe(ialu_reg);
13351 %}
13352 
13353 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13354 %{
13355   predicate(UseAPX);
13356   match(Set dst (AndL src1 src2));
13357   effect(KILL cr);
13358   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13359 
13360   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13361   ins_encode %{
13362     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13363   %}
13364   ins_pipe(ialu_reg);
13365 %}
13366 
13367 // And Register with Memory
13368 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13369 %{
13370   match(Set dst (AndL dst (LoadL src)));
13371   effect(KILL cr);
13372   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13373 
13374   ins_cost(150);
13375   format %{ "andq    $dst, $src\t# long" %}
13376   ins_encode %{
13377     __ andq($dst$$Register, $src$$Address);
13378   %}
13379   ins_pipe(ialu_reg_mem);
13380 %}
13381 
13382 // And Memory with Register
13383 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13384 %{
13385   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13386   effect(KILL cr);
13387   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13388 
13389   ins_cost(150);
13390   format %{ "andq    $dst, $src\t# long" %}
13391   ins_encode %{
13392     __ andq($dst$$Address, $src$$Register);
13393   %}
13394   ins_pipe(ialu_mem_reg);
13395 %}
13396 
13397 // And Memory with Immediate
13398 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13399 %{
13400   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13401   effect(KILL cr);
13402   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13403 
13404   ins_cost(125);
13405   format %{ "andq    $dst, $src\t# long" %}
13406   ins_encode %{
13407     __ andq($dst$$Address, $src$$constant);
13408   %}
13409   ins_pipe(ialu_mem_imm);
13410 %}
13411 
13412 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13413 %{
13414   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13415   // because AND/OR works well enough for 8/32-bit values.
13416   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13417 
13418   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13419   effect(KILL cr);
13420 
13421   ins_cost(125);
13422   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13423   ins_encode %{
13424     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13425   %}
13426   ins_pipe(ialu_mem_imm);
13427 %}
13428 
13429 // BMI1 instructions
13430 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13431   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13432   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13433   effect(KILL cr);
13434   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13435 
13436   ins_cost(125);
13437   format %{ "andnq  $dst, $src1, $src2" %}
13438 
13439   ins_encode %{
13440     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13441   %}
13442   ins_pipe(ialu_reg_mem);
13443 %}
13444 
13445 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13446   match(Set dst (AndL (XorL src1 minus_1) src2));
13447   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13448   effect(KILL cr);
13449   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13450 
13451   format %{ "andnq  $dst, $src1, $src2" %}
13452 
13453   ins_encode %{
13454   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13455   %}
13456   ins_pipe(ialu_reg_mem);
13457 %}
13458 
13459 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13460   match(Set dst (AndL (SubL imm_zero src) src));
13461   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13462   effect(KILL cr);
13463   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13464 
13465   format %{ "blsiq  $dst, $src" %}
13466 
13467   ins_encode %{
13468     __ blsiq($dst$$Register, $src$$Register);
13469   %}
13470   ins_pipe(ialu_reg);
13471 %}
13472 
13473 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13474   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13475   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13476   effect(KILL cr);
13477   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13478 
13479   ins_cost(125);
13480   format %{ "blsiq  $dst, $src" %}
13481 
13482   ins_encode %{
13483     __ blsiq($dst$$Register, $src$$Address);
13484   %}
13485   ins_pipe(ialu_reg_mem);
13486 %}
13487 
13488 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13489 %{
13490   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13491   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13492   effect(KILL cr);
13493   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13494 
13495   ins_cost(125);
13496   format %{ "blsmskq $dst, $src" %}
13497 
13498   ins_encode %{
13499     __ blsmskq($dst$$Register, $src$$Address);
13500   %}
13501   ins_pipe(ialu_reg_mem);
13502 %}
13503 
13504 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13505 %{
13506   match(Set dst (XorL (AddL src minus_1) src));
13507   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13508   effect(KILL cr);
13509   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13510 
13511   format %{ "blsmskq $dst, $src" %}
13512 
13513   ins_encode %{
13514     __ blsmskq($dst$$Register, $src$$Register);
13515   %}
13516 
13517   ins_pipe(ialu_reg);
13518 %}
13519 
13520 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13521 %{
13522   match(Set dst (AndL (AddL src minus_1) src) );
13523   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13524   effect(KILL cr);
13525   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13526 
13527   format %{ "blsrq  $dst, $src" %}
13528 
13529   ins_encode %{
13530     __ blsrq($dst$$Register, $src$$Register);
13531   %}
13532 
13533   ins_pipe(ialu_reg);
13534 %}
13535 
13536 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13537 %{
13538   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13539   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13540   effect(KILL cr);
13541   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13542 
13543   ins_cost(125);
13544   format %{ "blsrq  $dst, $src" %}
13545 
13546   ins_encode %{
13547     __ blsrq($dst$$Register, $src$$Address);
13548   %}
13549 
13550   ins_pipe(ialu_reg);
13551 %}
13552 
13553 // Or Instructions
13554 // Or Register with Register
13555 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13556 %{
13557   predicate(!UseAPX);
13558   match(Set dst (OrL dst src));
13559   effect(KILL cr);
13560   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13561 
13562   format %{ "orq     $dst, $src\t# long" %}
13563   ins_encode %{
13564     __ orq($dst$$Register, $src$$Register);
13565   %}
13566   ins_pipe(ialu_reg_reg);
13567 %}
13568 
13569 // Or Register with Register using New Data Destination (NDD)
13570 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13571 %{
13572   predicate(UseAPX);
13573   match(Set dst (OrL src1 src2));
13574   effect(KILL cr);
13575   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13576 
13577   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13578   ins_encode %{
13579     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13580 
13581   %}
13582   ins_pipe(ialu_reg_reg);
13583 %}
13584 
13585 // Use any_RegP to match R15 (TLS register) without spilling.
13586 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13587   predicate(!UseAPX);
13588   match(Set dst (OrL dst (CastP2X src)));
13589   effect(KILL cr);
13590   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13591 
13592   format %{ "orq     $dst, $src\t# long" %}
13593   ins_encode %{
13594     __ orq($dst$$Register, $src$$Register);
13595   %}
13596   ins_pipe(ialu_reg_reg);
13597 %}
13598 
13599 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13600   predicate(UseAPX);
13601   match(Set dst (OrL src1 (CastP2X src2)));
13602   effect(KILL cr);
13603   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13604 
13605   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13606   ins_encode %{
13607     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13608   %}
13609   ins_pipe(ialu_reg_reg);
13610 %}
13611 
13612 // Or Register with Immediate
13613 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13614 %{
13615   predicate(!UseAPX);
13616   match(Set dst (OrL dst src));
13617   effect(KILL cr);
13618   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13619 
13620   format %{ "orq     $dst, $src\t# long" %}
13621   ins_encode %{
13622     __ orq($dst$$Register, $src$$constant);
13623   %}
13624   ins_pipe(ialu_reg);
13625 %}
13626 
13627 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13628 %{
13629   predicate(UseAPX);
13630   match(Set dst (OrL src1 src2));
13631   effect(KILL cr);
13632   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13633 
13634   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13635   ins_encode %{
13636     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13637   %}
13638   ins_pipe(ialu_reg);
13639 %}
13640 
13641 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13642 %{
13643   predicate(UseAPX);
13644   match(Set dst (OrL src1 src2));
13645   effect(KILL cr);
13646   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13647 
13648   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
13649   ins_encode %{
13650     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13651   %}
13652   ins_pipe(ialu_reg);
13653 %}
13654 
13655 // Or Register with Memory
13656 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13657 %{
13658   match(Set dst (OrL dst (LoadL src)));
13659   effect(KILL cr);
13660   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13661 
13662   ins_cost(150);
13663   format %{ "orq     $dst, $src\t# long" %}
13664   ins_encode %{
13665     __ orq($dst$$Register, $src$$Address);
13666   %}
13667   ins_pipe(ialu_reg_mem);
13668 %}
13669 
13670 // Or Memory with Register
13671 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13672 %{
13673   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13674   effect(KILL cr);
13675   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13676 
13677   ins_cost(150);
13678   format %{ "orq     $dst, $src\t# long" %}
13679   ins_encode %{
13680     __ orq($dst$$Address, $src$$Register);
13681   %}
13682   ins_pipe(ialu_mem_reg);
13683 %}
13684 
13685 // Or Memory with Immediate
13686 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13687 %{
13688   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13689   effect(KILL cr);
13690   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13691 
13692   ins_cost(125);
13693   format %{ "orq     $dst, $src\t# long" %}
13694   ins_encode %{
13695     __ orq($dst$$Address, $src$$constant);
13696   %}
13697   ins_pipe(ialu_mem_imm);
13698 %}
13699 
13700 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
13701 %{
13702   // con should be a pure 64-bit power of 2 immediate
13703   // because AND/OR works well enough for 8/32-bit values.
13704   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
13705 
13706   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
13707   effect(KILL cr);
13708 
13709   ins_cost(125);
13710   format %{ "btsq    $dst, log2($con)\t# long" %}
13711   ins_encode %{
13712     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
13713   %}
13714   ins_pipe(ialu_mem_imm);
13715 %}
13716 
13717 // Xor Instructions
13718 // Xor Register with Register
13719 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13720 %{
13721   predicate(!UseAPX);
13722   match(Set dst (XorL dst src));
13723   effect(KILL cr);
13724   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13725 
13726   format %{ "xorq    $dst, $src\t# long" %}
13727   ins_encode %{
13728     __ xorq($dst$$Register, $src$$Register);
13729   %}
13730   ins_pipe(ialu_reg_reg);
13731 %}
13732 
13733 // Xor Register with Register using New Data Destination (NDD)
13734 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13735 %{
13736   predicate(UseAPX);
13737   match(Set dst (XorL src1 src2));
13738   effect(KILL cr);
13739   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13740 
13741   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
13742   ins_encode %{
13743     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13744   %}
13745   ins_pipe(ialu_reg_reg);
13746 %}
13747 
13748 // Xor Register with Immediate -1
13749 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
13750 %{
13751   predicate(!UseAPX);
13752   match(Set dst (XorL dst imm));
13753 
13754   format %{ "notq   $dst" %}
13755   ins_encode %{
13756      __ notq($dst$$Register);
13757   %}
13758   ins_pipe(ialu_reg);
13759 %}
13760 
13761 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
13762 %{
13763   predicate(UseAPX);
13764   match(Set dst (XorL src imm));
13765   flag(PD::Flag_ndd_demotable_opr1);
13766 
13767   format %{ "enotq   $dst, $src" %}
13768   ins_encode %{
13769     __ enotq($dst$$Register, $src$$Register);
13770   %}
13771   ins_pipe(ialu_reg);
13772 %}
13773 
13774 // Xor Register with Immediate
13775 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13776 %{
13777   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
13778   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
13779   match(Set dst (XorL dst src));
13780   effect(KILL cr);
13781   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13782 
13783   format %{ "xorq    $dst, $src\t# long" %}
13784   ins_encode %{
13785     __ xorq($dst$$Register, $src$$constant);
13786   %}
13787   ins_pipe(ialu_reg);
13788 %}
13789 
13790 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13791 %{
13792   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
13793   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
13794   match(Set dst (XorL src1 src2));
13795   effect(KILL cr);
13796   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13797 
13798   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
13799   ins_encode %{
13800     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13801   %}
13802   ins_pipe(ialu_reg);
13803 %}
13804 
13805 // Xor Register with Memory
13806 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13807 %{
13808   match(Set dst (XorL dst (LoadL src)));
13809   effect(KILL cr);
13810   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13811 
13812   ins_cost(150);
13813   format %{ "xorq    $dst, $src\t# long" %}
13814   ins_encode %{
13815     __ xorq($dst$$Register, $src$$Address);
13816   %}
13817   ins_pipe(ialu_reg_mem);
13818 %}
13819 
13820 // Xor Memory with Register
13821 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13822 %{
13823   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
13824   effect(KILL cr);
13825   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13826 
13827   ins_cost(150);
13828   format %{ "xorq    $dst, $src\t# long" %}
13829   ins_encode %{
13830     __ xorq($dst$$Address, $src$$Register);
13831   %}
13832   ins_pipe(ialu_mem_reg);
13833 %}
13834 
13835 // Xor Memory with Immediate
13836 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13837 %{
13838   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
13839   effect(KILL cr);
13840   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13841 
13842   ins_cost(125);
13843   format %{ "xorq    $dst, $src\t# long" %}
13844   ins_encode %{
13845     __ xorq($dst$$Address, $src$$constant);
13846   %}
13847   ins_pipe(ialu_mem_imm);
13848 %}
13849 
13850 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
13851 %{
13852   match(Set dst (CmpLTMask p q));
13853   effect(KILL cr);
13854 
13855   ins_cost(400);
13856   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
13857             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
13858             "negl    $dst" %}
13859   ins_encode %{
13860     __ cmpl($p$$Register, $q$$Register);
13861     __ setcc(Assembler::less, $dst$$Register);
13862     __ negl($dst$$Register);
13863   %}
13864   ins_pipe(pipe_slow);
13865 %}
13866 
13867 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
13868 %{
13869   match(Set dst (CmpLTMask dst zero));
13870   effect(KILL cr);
13871 
13872   ins_cost(100);
13873   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
13874   ins_encode %{
13875     __ sarl($dst$$Register, 31);
13876   %}
13877   ins_pipe(ialu_reg);
13878 %}
13879 
13880 /* Better to save a register than avoid a branch */
13881 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
13882 %{
13883   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
13884   effect(KILL cr);
13885   ins_cost(300);
13886   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
13887             "jge     done\n\t"
13888             "addl    $p,$y\n"
13889             "done:   " %}
13890   ins_encode %{
13891     Register Rp = $p$$Register;
13892     Register Rq = $q$$Register;
13893     Register Ry = $y$$Register;
13894     Label done;
13895     __ subl(Rp, Rq);
13896     __ jccb(Assembler::greaterEqual, done);
13897     __ addl(Rp, Ry);
13898     __ bind(done);
13899   %}
13900   ins_pipe(pipe_cmplt);
13901 %}
13902 
13903 /* Better to save a register than avoid a branch */
13904 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
13905 %{
13906   match(Set y (AndI (CmpLTMask p q) y));
13907   effect(KILL cr);
13908 
13909   ins_cost(300);
13910 
13911   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
13912             "jlt     done\n\t"
13913             "xorl    $y, $y\n"
13914             "done:   " %}
13915   ins_encode %{
13916     Register Rp = $p$$Register;
13917     Register Rq = $q$$Register;
13918     Register Ry = $y$$Register;
13919     Label done;
13920     __ cmpl(Rp, Rq);
13921     __ jccb(Assembler::less, done);
13922     __ xorl(Ry, Ry);
13923     __ bind(done);
13924   %}
13925   ins_pipe(pipe_cmplt);
13926 %}
13927 
13928 
13929 //---------- FP Instructions------------------------------------------------
13930 
13931 // Really expensive, avoid
13932 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
13933 %{
13934   match(Set cr (CmpF src1 src2));
13935 
13936   ins_cost(500);
13937   format %{ "ucomiss $src1, $src2\n\t"
13938             "jnp,s   exit\n\t"
13939             "pushfq\t# saw NaN, set CF\n\t"
13940             "andq    [rsp], #0xffffff2b\n\t"
13941             "popfq\n"
13942     "exit:" %}
13943   ins_encode %{
13944     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
13945     emit_cmpfp_fixup(masm);
13946   %}
13947   ins_pipe(pipe_slow);
13948 %}
13949 
13950 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
13951   match(Set cr (CmpF src1 src2));
13952 
13953   ins_cost(100);
13954   format %{ "ucomiss $src1, $src2" %}
13955   ins_encode %{
13956     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
13957   %}
13958   ins_pipe(pipe_slow);
13959 %}
13960 
13961 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
13962   match(Set cr (CmpF src1 src2));
13963 
13964   ins_cost(100);
13965   format %{ "evucomxss $src1, $src2" %}
13966   ins_encode %{
13967     __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
13968   %}
13969   ins_pipe(pipe_slow);
13970 %}
13971 
13972 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
13973   match(Set cr (CmpF src1 (LoadF src2)));
13974 
13975   ins_cost(100);
13976   format %{ "ucomiss $src1, $src2" %}
13977   ins_encode %{
13978     __ ucomiss($src1$$XMMRegister, $src2$$Address);
13979   %}
13980   ins_pipe(pipe_slow);
13981 %}
13982 
13983 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
13984   match(Set cr (CmpF src1 (LoadF src2)));
13985 
13986   ins_cost(100);
13987   format %{ "evucomxss $src1, $src2" %}
13988   ins_encode %{
13989     __ evucomxss($src1$$XMMRegister, $src2$$Address);
13990   %}
13991   ins_pipe(pipe_slow);
13992 %}
13993 
13994 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
13995   match(Set cr (CmpF src con));
13996 
13997   ins_cost(100);
13998   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
13999   ins_encode %{
14000     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14001   %}
14002   ins_pipe(pipe_slow);
14003 %}
14004 
14005 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14006   match(Set cr (CmpF src con));
14007 
14008   ins_cost(100);
14009   format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14010   ins_encode %{
14011     __ evucomxss($src$$XMMRegister, $constantaddress($con));
14012   %}
14013   ins_pipe(pipe_slow);
14014 %}
14015 
14016 // Really expensive, avoid
14017 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14018 %{
14019   match(Set cr (CmpD src1 src2));
14020 
14021   ins_cost(500);
14022   format %{ "ucomisd $src1, $src2\n\t"
14023             "jnp,s   exit\n\t"
14024             "pushfq\t# saw NaN, set CF\n\t"
14025             "andq    [rsp], #0xffffff2b\n\t"
14026             "popfq\n"
14027     "exit:" %}
14028   ins_encode %{
14029     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14030     emit_cmpfp_fixup(masm);
14031   %}
14032   ins_pipe(pipe_slow);
14033 %}
14034 
14035 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14036   match(Set cr (CmpD src1 src2));
14037 
14038   ins_cost(100);
14039   format %{ "ucomisd $src1, $src2 test" %}
14040   ins_encode %{
14041     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14042   %}
14043   ins_pipe(pipe_slow);
14044 %}
14045 
14046 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14047   match(Set cr (CmpD src1 src2));
14048 
14049   ins_cost(100);
14050   format %{ "evucomxsd $src1, $src2 test" %}
14051   ins_encode %{
14052     __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14053   %}
14054   ins_pipe(pipe_slow);
14055 %}
14056 
14057 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14058   match(Set cr (CmpD src1 (LoadD src2)));
14059 
14060   ins_cost(100);
14061   format %{ "ucomisd $src1, $src2" %}
14062   ins_encode %{
14063     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14064   %}
14065   ins_pipe(pipe_slow);
14066 %}
14067 
14068 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14069   match(Set cr (CmpD src1 (LoadD src2)));
14070 
14071   ins_cost(100);
14072   format %{ "evucomxsd $src1, $src2" %}
14073   ins_encode %{
14074     __ evucomxsd($src1$$XMMRegister, $src2$$Address);
14075   %}
14076   ins_pipe(pipe_slow);
14077 %}
14078 
14079 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14080   match(Set cr (CmpD src con));
14081   ins_cost(100);
14082   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14083   ins_encode %{
14084     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14085   %}
14086   ins_pipe(pipe_slow);
14087 %}
14088 
14089 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14090   match(Set cr (CmpD src con));
14091 
14092   ins_cost(100);
14093   format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14094   ins_encode %{
14095     __ evucomxsd($src$$XMMRegister, $constantaddress($con));
14096   %}
14097   ins_pipe(pipe_slow);
14098 %}
14099 
14100 // Compare into -1,0,1
14101 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14102 %{
14103   match(Set dst (CmpF3 src1 src2));
14104   effect(KILL cr);
14105 
14106   ins_cost(275);
14107   format %{ "ucomiss $src1, $src2\n\t"
14108             "movl    $dst, #-1\n\t"
14109             "jp,s    done\n\t"
14110             "jb,s    done\n\t"
14111             "setne   $dst\n\t"
14112             "movzbl  $dst, $dst\n"
14113     "done:" %}
14114   ins_encode %{
14115     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14116     emit_cmpfp3(masm, $dst$$Register);
14117   %}
14118   ins_pipe(pipe_slow);
14119 %}
14120 
14121 // Compare into -1,0,1
14122 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14123 %{
14124   match(Set dst (CmpF3 src1 (LoadF src2)));
14125   effect(KILL cr);
14126 
14127   ins_cost(275);
14128   format %{ "ucomiss $src1, $src2\n\t"
14129             "movl    $dst, #-1\n\t"
14130             "jp,s    done\n\t"
14131             "jb,s    done\n\t"
14132             "setne   $dst\n\t"
14133             "movzbl  $dst, $dst\n"
14134     "done:" %}
14135   ins_encode %{
14136     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14137     emit_cmpfp3(masm, $dst$$Register);
14138   %}
14139   ins_pipe(pipe_slow);
14140 %}
14141 
14142 // Compare into -1,0,1
14143 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14144   match(Set dst (CmpF3 src con));
14145   effect(KILL cr);
14146 
14147   ins_cost(275);
14148   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14149             "movl    $dst, #-1\n\t"
14150             "jp,s    done\n\t"
14151             "jb,s    done\n\t"
14152             "setne   $dst\n\t"
14153             "movzbl  $dst, $dst\n"
14154     "done:" %}
14155   ins_encode %{
14156     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14157     emit_cmpfp3(masm, $dst$$Register);
14158   %}
14159   ins_pipe(pipe_slow);
14160 %}
14161 
14162 // Compare into -1,0,1
14163 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14164 %{
14165   match(Set dst (CmpD3 src1 src2));
14166   effect(KILL cr);
14167 
14168   ins_cost(275);
14169   format %{ "ucomisd $src1, $src2\n\t"
14170             "movl    $dst, #-1\n\t"
14171             "jp,s    done\n\t"
14172             "jb,s    done\n\t"
14173             "setne   $dst\n\t"
14174             "movzbl  $dst, $dst\n"
14175     "done:" %}
14176   ins_encode %{
14177     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14178     emit_cmpfp3(masm, $dst$$Register);
14179   %}
14180   ins_pipe(pipe_slow);
14181 %}
14182 
14183 // Compare into -1,0,1
14184 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14185 %{
14186   match(Set dst (CmpD3 src1 (LoadD src2)));
14187   effect(KILL cr);
14188 
14189   ins_cost(275);
14190   format %{ "ucomisd $src1, $src2\n\t"
14191             "movl    $dst, #-1\n\t"
14192             "jp,s    done\n\t"
14193             "jb,s    done\n\t"
14194             "setne   $dst\n\t"
14195             "movzbl  $dst, $dst\n"
14196     "done:" %}
14197   ins_encode %{
14198     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14199     emit_cmpfp3(masm, $dst$$Register);
14200   %}
14201   ins_pipe(pipe_slow);
14202 %}
14203 
14204 // Compare into -1,0,1
14205 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14206   match(Set dst (CmpD3 src con));
14207   effect(KILL cr);
14208 
14209   ins_cost(275);
14210   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14211             "movl    $dst, #-1\n\t"
14212             "jp,s    done\n\t"
14213             "jb,s    done\n\t"
14214             "setne   $dst\n\t"
14215             "movzbl  $dst, $dst\n"
14216     "done:" %}
14217   ins_encode %{
14218     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14219     emit_cmpfp3(masm, $dst$$Register);
14220   %}
14221   ins_pipe(pipe_slow);
14222 %}
14223 
14224 //----------Arithmetic Conversion Instructions---------------------------------
14225 
14226 instruct convF2D_reg_reg(regD dst, regF src)
14227 %{
14228   match(Set dst (ConvF2D src));
14229 
14230   format %{ "cvtss2sd $dst, $src" %}
14231   ins_encode %{
14232     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14233   %}
14234   ins_pipe(pipe_slow); // XXX
14235 %}
14236 
14237 instruct convF2D_reg_mem(regD dst, memory src)
14238 %{
14239   predicate(UseAVX == 0);
14240   match(Set dst (ConvF2D (LoadF src)));
14241 
14242   format %{ "cvtss2sd $dst, $src" %}
14243   ins_encode %{
14244     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14245   %}
14246   ins_pipe(pipe_slow); // XXX
14247 %}
14248 
14249 instruct convD2F_reg_reg(regF dst, regD src)
14250 %{
14251   match(Set dst (ConvD2F src));
14252 
14253   format %{ "cvtsd2ss $dst, $src" %}
14254   ins_encode %{
14255     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14256   %}
14257   ins_pipe(pipe_slow); // XXX
14258 %}
14259 
14260 instruct convD2F_reg_mem(regF dst, memory src)
14261 %{
14262   predicate(UseAVX == 0);
14263   match(Set dst (ConvD2F (LoadD src)));
14264 
14265   format %{ "cvtsd2ss $dst, $src" %}
14266   ins_encode %{
14267     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14268   %}
14269   ins_pipe(pipe_slow); // XXX
14270 %}
14271 
14272 // XXX do mem variants
14273 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14274 %{
14275   predicate(!VM_Version::supports_avx10_2());
14276   match(Set dst (ConvF2I src));
14277   effect(KILL cr);
14278   format %{ "convert_f2i $dst, $src" %}
14279   ins_encode %{
14280     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14281   %}
14282   ins_pipe(pipe_slow);
14283 %}
14284 
14285 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14286 %{
14287   predicate(VM_Version::supports_avx10_2());
14288   match(Set dst (ConvF2I src));
14289   format %{ "evcvttss2sisl $dst, $src" %}
14290   ins_encode %{
14291     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14292   %}
14293   ins_pipe(pipe_slow);
14294 %}
14295 
14296 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14297 %{
14298   predicate(VM_Version::supports_avx10_2());
14299   match(Set dst (ConvF2I (LoadF src)));
14300   format %{ "evcvttss2sisl $dst, $src" %}
14301   ins_encode %{
14302     __ evcvttss2sisl($dst$$Register, $src$$Address);
14303   %}
14304   ins_pipe(pipe_slow);
14305 %}
14306 
14307 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14308 %{
14309   predicate(!VM_Version::supports_avx10_2());
14310   match(Set dst (ConvF2L src));
14311   effect(KILL cr);
14312   format %{ "convert_f2l $dst, $src"%}
14313   ins_encode %{
14314     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14315   %}
14316   ins_pipe(pipe_slow);
14317 %}
14318 
14319 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14320 %{
14321   predicate(VM_Version::supports_avx10_2());
14322   match(Set dst (ConvF2L src));
14323   format %{ "evcvttss2sisq $dst, $src" %}
14324   ins_encode %{
14325     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14326   %}
14327   ins_pipe(pipe_slow);
14328 %}
14329 
14330 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14331 %{
14332   predicate(VM_Version::supports_avx10_2());
14333   match(Set dst (ConvF2L (LoadF src)));
14334   format %{ "evcvttss2sisq $dst, $src" %}
14335   ins_encode %{
14336     __ evcvttss2sisq($dst$$Register, $src$$Address);
14337   %}
14338   ins_pipe(pipe_slow);
14339 %}
14340 
14341 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14342 %{
14343   predicate(!VM_Version::supports_avx10_2());
14344   match(Set dst (ConvD2I src));
14345   effect(KILL cr);
14346   format %{ "convert_d2i $dst, $src"%}
14347   ins_encode %{
14348     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14349   %}
14350   ins_pipe(pipe_slow);
14351 %}
14352 
14353 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14354 %{
14355   predicate(VM_Version::supports_avx10_2());
14356   match(Set dst (ConvD2I src));
14357   format %{ "evcvttsd2sisl $dst, $src" %}
14358   ins_encode %{
14359     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14360   %}
14361   ins_pipe(pipe_slow);
14362 %}
14363 
14364 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14365 %{
14366   predicate(VM_Version::supports_avx10_2());
14367   match(Set dst (ConvD2I (LoadD src)));
14368   format %{ "evcvttsd2sisl $dst, $src" %}
14369   ins_encode %{
14370     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14371   %}
14372   ins_pipe(pipe_slow);
14373 %}
14374 
14375 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14376 %{
14377   predicate(!VM_Version::supports_avx10_2());
14378   match(Set dst (ConvD2L src));
14379   effect(KILL cr);
14380   format %{ "convert_d2l $dst, $src"%}
14381   ins_encode %{
14382     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14383   %}
14384   ins_pipe(pipe_slow);
14385 %}
14386 
14387 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14388 %{
14389   predicate(VM_Version::supports_avx10_2());
14390   match(Set dst (ConvD2L src));
14391   format %{ "evcvttsd2sisq $dst, $src" %}
14392   ins_encode %{
14393     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14394   %}
14395   ins_pipe(pipe_slow);
14396 %}
14397 
14398 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14399 %{
14400   predicate(VM_Version::supports_avx10_2());
14401   match(Set dst (ConvD2L (LoadD src)));
14402   format %{ "evcvttsd2sisq $dst, $src" %}
14403   ins_encode %{
14404     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14405   %}
14406   ins_pipe(pipe_slow);
14407 %}
14408 
14409 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14410 %{
14411   match(Set dst (RoundD src));
14412   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14413   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14414   ins_encode %{
14415     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14416   %}
14417   ins_pipe(pipe_slow);
14418 %}
14419 
14420 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14421 %{
14422   match(Set dst (RoundF src));
14423   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14424   format %{ "round_float $dst,$src" %}
14425   ins_encode %{
14426     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14427   %}
14428   ins_pipe(pipe_slow);
14429 %}
14430 
14431 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14432 %{
14433   predicate(!UseXmmI2F);
14434   match(Set dst (ConvI2F src));
14435 
14436   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14437   ins_encode %{
14438     if (UseAVX > 0) {
14439       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14440     }
14441     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14442   %}
14443   ins_pipe(pipe_slow); // XXX
14444 %}
14445 
14446 instruct convI2F_reg_mem(regF dst, memory src)
14447 %{
14448   predicate(UseAVX == 0);
14449   match(Set dst (ConvI2F (LoadI src)));
14450 
14451   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14452   ins_encode %{
14453     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14454   %}
14455   ins_pipe(pipe_slow); // XXX
14456 %}
14457 
14458 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14459 %{
14460   predicate(!UseXmmI2D);
14461   match(Set dst (ConvI2D src));
14462 
14463   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14464   ins_encode %{
14465     if (UseAVX > 0) {
14466       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14467     }
14468     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14469   %}
14470   ins_pipe(pipe_slow); // XXX
14471 %}
14472 
14473 instruct convI2D_reg_mem(regD dst, memory src)
14474 %{
14475   predicate(UseAVX == 0);
14476   match(Set dst (ConvI2D (LoadI src)));
14477 
14478   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14479   ins_encode %{
14480     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14481   %}
14482   ins_pipe(pipe_slow); // XXX
14483 %}
14484 
14485 instruct convXI2F_reg(regF dst, rRegI src)
14486 %{
14487   predicate(UseXmmI2F);
14488   match(Set dst (ConvI2F src));
14489 
14490   format %{ "movdl $dst, $src\n\t"
14491             "cvtdq2psl $dst, $dst\t# i2f" %}
14492   ins_encode %{
14493     __ movdl($dst$$XMMRegister, $src$$Register);
14494     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14495   %}
14496   ins_pipe(pipe_slow); // XXX
14497 %}
14498 
14499 instruct convXI2D_reg(regD dst, rRegI src)
14500 %{
14501   predicate(UseXmmI2D);
14502   match(Set dst (ConvI2D src));
14503 
14504   format %{ "movdl $dst, $src\n\t"
14505             "cvtdq2pdl $dst, $dst\t# i2d" %}
14506   ins_encode %{
14507     __ movdl($dst$$XMMRegister, $src$$Register);
14508     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14509   %}
14510   ins_pipe(pipe_slow); // XXX
14511 %}
14512 
14513 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14514 %{
14515   match(Set dst (ConvL2F src));
14516 
14517   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14518   ins_encode %{
14519     if (UseAVX > 0) {
14520       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14521     }
14522     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14523   %}
14524   ins_pipe(pipe_slow); // XXX
14525 %}
14526 
14527 instruct convL2F_reg_mem(regF dst, memory src)
14528 %{
14529   predicate(UseAVX == 0);
14530   match(Set dst (ConvL2F (LoadL src)));
14531 
14532   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14533   ins_encode %{
14534     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14535   %}
14536   ins_pipe(pipe_slow); // XXX
14537 %}
14538 
14539 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14540 %{
14541   match(Set dst (ConvL2D src));
14542 
14543   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14544   ins_encode %{
14545     if (UseAVX > 0) {
14546       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14547     }
14548     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14549   %}
14550   ins_pipe(pipe_slow); // XXX
14551 %}
14552 
14553 instruct convL2D_reg_mem(regD dst, memory src)
14554 %{
14555   predicate(UseAVX == 0);
14556   match(Set dst (ConvL2D (LoadL src)));
14557 
14558   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14559   ins_encode %{
14560     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14561   %}
14562   ins_pipe(pipe_slow); // XXX
14563 %}
14564 
14565 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14566 %{
14567   match(Set dst (ConvI2L src));
14568 
14569   ins_cost(125);
14570   format %{ "movslq  $dst, $src\t# i2l" %}
14571   ins_encode %{
14572     __ movslq($dst$$Register, $src$$Register);
14573   %}
14574   ins_pipe(ialu_reg_reg);
14575 %}
14576 
14577 // Zero-extend convert int to long
14578 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14579 %{
14580   match(Set dst (AndL (ConvI2L src) mask));
14581 
14582   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14583   ins_encode %{
14584     if ($dst$$reg != $src$$reg) {
14585       __ movl($dst$$Register, $src$$Register);
14586     }
14587   %}
14588   ins_pipe(ialu_reg_reg);
14589 %}
14590 
14591 // Zero-extend convert int to long
14592 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14593 %{
14594   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14595 
14596   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14597   ins_encode %{
14598     __ movl($dst$$Register, $src$$Address);
14599   %}
14600   ins_pipe(ialu_reg_mem);
14601 %}
14602 
14603 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14604 %{
14605   match(Set dst (AndL src mask));
14606 
14607   format %{ "movl    $dst, $src\t# zero-extend long" %}
14608   ins_encode %{
14609     __ movl($dst$$Register, $src$$Register);
14610   %}
14611   ins_pipe(ialu_reg_reg);
14612 %}
14613 
14614 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14615 %{
14616   match(Set dst (ConvL2I src));
14617 
14618   format %{ "movl    $dst, $src\t# l2i" %}
14619   ins_encode %{
14620     __ movl($dst$$Register, $src$$Register);
14621   %}
14622   ins_pipe(ialu_reg_reg);
14623 %}
14624 
14625 
14626 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14627   match(Set dst (MoveF2I src));
14628   effect(DEF dst, USE src);
14629 
14630   ins_cost(125);
14631   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
14632   ins_encode %{
14633     __ movl($dst$$Register, Address(rsp, $src$$disp));
14634   %}
14635   ins_pipe(ialu_reg_mem);
14636 %}
14637 
14638 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14639   match(Set dst (MoveI2F src));
14640   effect(DEF dst, USE src);
14641 
14642   ins_cost(125);
14643   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
14644   ins_encode %{
14645     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14646   %}
14647   ins_pipe(pipe_slow);
14648 %}
14649 
14650 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14651   match(Set dst (MoveD2L src));
14652   effect(DEF dst, USE src);
14653 
14654   ins_cost(125);
14655   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
14656   ins_encode %{
14657     __ movq($dst$$Register, Address(rsp, $src$$disp));
14658   %}
14659   ins_pipe(ialu_reg_mem);
14660 %}
14661 
14662 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14663   predicate(!UseXmmLoadAndClearUpper);
14664   match(Set dst (MoveL2D src));
14665   effect(DEF dst, USE src);
14666 
14667   ins_cost(125);
14668   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
14669   ins_encode %{
14670     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14671   %}
14672   ins_pipe(pipe_slow);
14673 %}
14674 
14675 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
14676   predicate(UseXmmLoadAndClearUpper);
14677   match(Set dst (MoveL2D src));
14678   effect(DEF dst, USE src);
14679 
14680   ins_cost(125);
14681   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
14682   ins_encode %{
14683     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14684   %}
14685   ins_pipe(pipe_slow);
14686 %}
14687 
14688 
14689 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
14690   match(Set dst (MoveF2I src));
14691   effect(DEF dst, USE src);
14692 
14693   ins_cost(95); // XXX
14694   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
14695   ins_encode %{
14696     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
14697   %}
14698   ins_pipe(pipe_slow);
14699 %}
14700 
14701 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
14702   match(Set dst (MoveI2F src));
14703   effect(DEF dst, USE src);
14704 
14705   ins_cost(100);
14706   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
14707   ins_encode %{
14708     __ movl(Address(rsp, $dst$$disp), $src$$Register);
14709   %}
14710   ins_pipe( ialu_mem_reg );
14711 %}
14712 
14713 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
14714   match(Set dst (MoveD2L src));
14715   effect(DEF dst, USE src);
14716 
14717   ins_cost(95); // XXX
14718   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
14719   ins_encode %{
14720     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
14721   %}
14722   ins_pipe(pipe_slow);
14723 %}
14724 
14725 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
14726   match(Set dst (MoveL2D src));
14727   effect(DEF dst, USE src);
14728 
14729   ins_cost(100);
14730   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
14731   ins_encode %{
14732     __ movq(Address(rsp, $dst$$disp), $src$$Register);
14733   %}
14734   ins_pipe(ialu_mem_reg);
14735 %}
14736 
14737 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
14738   match(Set dst (MoveF2I src));
14739   effect(DEF dst, USE src);
14740   ins_cost(85);
14741   format %{ "movd    $dst,$src\t# MoveF2I" %}
14742   ins_encode %{
14743     __ movdl($dst$$Register, $src$$XMMRegister);
14744   %}
14745   ins_pipe( pipe_slow );
14746 %}
14747 
14748 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
14749   match(Set dst (MoveD2L src));
14750   effect(DEF dst, USE src);
14751   ins_cost(85);
14752   format %{ "movd    $dst,$src\t# MoveD2L" %}
14753   ins_encode %{
14754     __ movdq($dst$$Register, $src$$XMMRegister);
14755   %}
14756   ins_pipe( pipe_slow );
14757 %}
14758 
14759 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
14760   match(Set dst (MoveI2F src));
14761   effect(DEF dst, USE src);
14762   ins_cost(100);
14763   format %{ "movd    $dst,$src\t# MoveI2F" %}
14764   ins_encode %{
14765     __ movdl($dst$$XMMRegister, $src$$Register);
14766   %}
14767   ins_pipe( pipe_slow );
14768 %}
14769 
14770 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
14771   match(Set dst (MoveL2D src));
14772   effect(DEF dst, USE src);
14773   ins_cost(100);
14774   format %{ "movd    $dst,$src\t# MoveL2D" %}
14775   ins_encode %{
14776      __ movdq($dst$$XMMRegister, $src$$Register);
14777   %}
14778   ins_pipe( pipe_slow );
14779 %}
14780 
14781 
14782 // Fast clearing of an array
14783 // Small non-constant lenght ClearArray for non-AVX512 targets.
14784 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
14785                   Universe dummy, rFlagsReg cr)
14786 %{
14787   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
14788   match(Set dummy (ClearArray (Binary cnt base) val));
14789   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
14790 
14791   format %{ $$template
14792     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
14793     $$emit$$"jg      LARGE\n\t"
14794     $$emit$$"dec     rcx\n\t"
14795     $$emit$$"js      DONE\t# Zero length\n\t"
14796     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
14797     $$emit$$"dec     rcx\n\t"
14798     $$emit$$"jge     LOOP\n\t"
14799     $$emit$$"jmp     DONE\n\t"
14800     $$emit$$"# LARGE:\n\t"
14801     if (UseFastStosb) {
14802        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
14803        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
14804     } else if (UseXMMForObjInit) {
14805        $$emit$$"movdq   $tmp, $val\n\t"
14806        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
14807        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
14808        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14809        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14810        $$emit$$"vmovdqu $tmp,(rax)\n\t"
14811        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
14812        $$emit$$"add     0x40,rax\n\t"
14813        $$emit$$"# L_zero_64_bytes:\n\t"
14814        $$emit$$"sub     0x8,rcx\n\t"
14815        $$emit$$"jge     L_loop\n\t"
14816        $$emit$$"add     0x4,rcx\n\t"
14817        $$emit$$"jl      L_tail\n\t"
14818        $$emit$$"vmovdqu $tmp,(rax)\n\t"
14819        $$emit$$"add     0x20,rax\n\t"
14820        $$emit$$"sub     0x4,rcx\n\t"
14821        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14822        $$emit$$"add     0x4,rcx\n\t"
14823        $$emit$$"jle     L_end\n\t"
14824        $$emit$$"dec     rcx\n\t"
14825        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14826        $$emit$$"vmovq   xmm0,(rax)\n\t"
14827        $$emit$$"add     0x8,rax\n\t"
14828        $$emit$$"dec     rcx\n\t"
14829        $$emit$$"jge     L_sloop\n\t"
14830        $$emit$$"# L_end:\n\t"
14831     } else {
14832        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
14833     }
14834     $$emit$$"# DONE"
14835   %}
14836   ins_encode %{
14837     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
14838                  $tmp$$XMMRegister, false, false);
14839   %}
14840   ins_pipe(pipe_slow);
14841 %}
14842 
14843 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
14844                             Universe dummy, rFlagsReg cr)
14845 %{
14846   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
14847   match(Set dummy (ClearArray (Binary cnt base) val));
14848   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
14849 
14850   format %{ $$template
14851     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
14852     $$emit$$"jg      LARGE\n\t"
14853     $$emit$$"dec     rcx\n\t"
14854     $$emit$$"js      DONE\t# Zero length\n\t"
14855     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
14856     $$emit$$"dec     rcx\n\t"
14857     $$emit$$"jge     LOOP\n\t"
14858     $$emit$$"jmp     DONE\n\t"
14859     $$emit$$"# LARGE:\n\t"
14860     if (UseXMMForObjInit) {
14861        $$emit$$"movdq   $tmp, $val\n\t"
14862        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
14863        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
14864        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14865        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14866        $$emit$$"vmovdqu $tmp,(rax)\n\t"
14867        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
14868        $$emit$$"add     0x40,rax\n\t"
14869        $$emit$$"# L_zero_64_bytes:\n\t"
14870        $$emit$$"sub     0x8,rcx\n\t"
14871        $$emit$$"jge     L_loop\n\t"
14872        $$emit$$"add     0x4,rcx\n\t"
14873        $$emit$$"jl      L_tail\n\t"
14874        $$emit$$"vmovdqu $tmp,(rax)\n\t"
14875        $$emit$$"add     0x20,rax\n\t"
14876        $$emit$$"sub     0x4,rcx\n\t"
14877        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14878        $$emit$$"add     0x4,rcx\n\t"
14879        $$emit$$"jle     L_end\n\t"
14880        $$emit$$"dec     rcx\n\t"
14881        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14882        $$emit$$"vmovq   xmm0,(rax)\n\t"
14883        $$emit$$"add     0x8,rax\n\t"
14884        $$emit$$"dec     rcx\n\t"
14885        $$emit$$"jge     L_sloop\n\t"
14886        $$emit$$"# L_end:\n\t"
14887     } else {
14888        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
14889     }
14890     $$emit$$"# DONE"
14891   %}
14892   ins_encode %{
14893     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
14894                  $tmp$$XMMRegister, false, true);
14895   %}
14896   ins_pipe(pipe_slow);
14897 %}
14898 
14899 // Small non-constant length ClearArray for AVX512 targets.
14900 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
14901                        Universe dummy, rFlagsReg cr)
14902 %{
14903   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
14904   match(Set dummy (ClearArray (Binary cnt base) val));
14905   ins_cost(125);
14906   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
14907 
14908   format %{ $$template
14909     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14910     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
14911     $$emit$$"jg      LARGE\n\t"
14912     $$emit$$"dec     rcx\n\t"
14913     $$emit$$"js      DONE\t# Zero length\n\t"
14914     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
14915     $$emit$$"dec     rcx\n\t"
14916     $$emit$$"jge     LOOP\n\t"
14917     $$emit$$"jmp     DONE\n\t"
14918     $$emit$$"# LARGE:\n\t"
14919     if (UseFastStosb) {
14920        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
14921        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
14922     } else if (UseXMMForObjInit) {
14923        $$emit$$"mov     rdi,rax\n\t"
14924        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
14925        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14926        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14927        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14928        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14929        $$emit$$"add     0x40,rax\n\t"
14930        $$emit$$"# L_zero_64_bytes:\n\t"
14931        $$emit$$"sub     0x8,rcx\n\t"
14932        $$emit$$"jge     L_loop\n\t"
14933        $$emit$$"add     0x4,rcx\n\t"
14934        $$emit$$"jl      L_tail\n\t"
14935        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14936        $$emit$$"add     0x20,rax\n\t"
14937        $$emit$$"sub     0x4,rcx\n\t"
14938        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14939        $$emit$$"add     0x4,rcx\n\t"
14940        $$emit$$"jle     L_end\n\t"
14941        $$emit$$"dec     rcx\n\t"
14942        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14943        $$emit$$"vmovq   xmm0,(rax)\n\t"
14944        $$emit$$"add     0x8,rax\n\t"
14945        $$emit$$"dec     rcx\n\t"
14946        $$emit$$"jge     L_sloop\n\t"
14947        $$emit$$"# L_end:\n\t"
14948     } else {
14949        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
14950     }
14951     $$emit$$"# DONE"
14952   %}
14953   ins_encode %{
14954     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
14955                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
14956   %}
14957   ins_pipe(pipe_slow);
14958 %}
14959 
14960 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
14961                                  Universe dummy, rFlagsReg cr)
14962 %{
14963   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
14964   match(Set dummy (ClearArray (Binary cnt base) val));
14965   ins_cost(125);
14966   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
14967 
14968   format %{ $$template
14969     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14970     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
14971     $$emit$$"jg      LARGE\n\t"
14972     $$emit$$"dec     rcx\n\t"
14973     $$emit$$"js      DONE\t# Zero length\n\t"
14974     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
14975     $$emit$$"dec     rcx\n\t"
14976     $$emit$$"jge     LOOP\n\t"
14977     $$emit$$"jmp     DONE\n\t"
14978     $$emit$$"# LARGE:\n\t"
14979     if (UseFastStosb) {
14980        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
14981        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
14982     } else if (UseXMMForObjInit) {
14983        $$emit$$"mov     rdi,rax\n\t"
14984        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
14985        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14986        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14987        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14988        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14989        $$emit$$"add     0x40,rax\n\t"
14990        $$emit$$"# L_zero_64_bytes:\n\t"
14991        $$emit$$"sub     0x8,rcx\n\t"
14992        $$emit$$"jge     L_loop\n\t"
14993        $$emit$$"add     0x4,rcx\n\t"
14994        $$emit$$"jl      L_tail\n\t"
14995        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14996        $$emit$$"add     0x20,rax\n\t"
14997        $$emit$$"sub     0x4,rcx\n\t"
14998        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14999        $$emit$$"add     0x4,rcx\n\t"
15000        $$emit$$"jle     L_end\n\t"
15001        $$emit$$"dec     rcx\n\t"
15002        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15003        $$emit$$"vmovq   xmm0,(rax)\n\t"
15004        $$emit$$"add     0x8,rax\n\t"
15005        $$emit$$"dec     rcx\n\t"
15006        $$emit$$"jge     L_sloop\n\t"
15007        $$emit$$"# L_end:\n\t"
15008     } else {
15009        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15010     }
15011     $$emit$$"# DONE"
15012   %}
15013   ins_encode %{
15014     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15015                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15016   %}
15017   ins_pipe(pipe_slow);
15018 %}
15019 
15020 // Large non-constant length ClearArray for non-AVX512 targets.
15021 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15022                         Universe dummy, rFlagsReg cr)
15023 %{
15024   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15025   match(Set dummy (ClearArray (Binary cnt base) val));
15026   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15027 
15028   format %{ $$template
15029     if (UseFastStosb) {
15030        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15031        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15032     } else if (UseXMMForObjInit) {
15033        $$emit$$"movdq   $tmp, $val\n\t"
15034        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15035        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15036        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15037        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15038        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15039        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15040        $$emit$$"add     0x40,rax\n\t"
15041        $$emit$$"# L_zero_64_bytes:\n\t"
15042        $$emit$$"sub     0x8,rcx\n\t"
15043        $$emit$$"jge     L_loop\n\t"
15044        $$emit$$"add     0x4,rcx\n\t"
15045        $$emit$$"jl      L_tail\n\t"
15046        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15047        $$emit$$"add     0x20,rax\n\t"
15048        $$emit$$"sub     0x4,rcx\n\t"
15049        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15050        $$emit$$"add     0x4,rcx\n\t"
15051        $$emit$$"jle     L_end\n\t"
15052        $$emit$$"dec     rcx\n\t"
15053        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15054        $$emit$$"vmovq   xmm0,(rax)\n\t"
15055        $$emit$$"add     0x8,rax\n\t"
15056        $$emit$$"dec     rcx\n\t"
15057        $$emit$$"jge     L_sloop\n\t"
15058        $$emit$$"# L_end:\n\t"
15059     } else {
15060        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15061     }
15062   %}
15063   ins_encode %{
15064     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15065                  $tmp$$XMMRegister, true, false);
15066   %}
15067   ins_pipe(pipe_slow);
15068 %}
15069 
15070 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15071                                   Universe dummy, rFlagsReg cr)
15072 %{
15073   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15074   match(Set dummy (ClearArray (Binary cnt base) val));
15075   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15076 
15077   format %{ $$template
15078     if (UseXMMForObjInit) {
15079        $$emit$$"movdq   $tmp, $val\n\t"
15080        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15081        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15082        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15083        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15084        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15085        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15086        $$emit$$"add     0x40,rax\n\t"
15087        $$emit$$"# L_zero_64_bytes:\n\t"
15088        $$emit$$"sub     0x8,rcx\n\t"
15089        $$emit$$"jge     L_loop\n\t"
15090        $$emit$$"add     0x4,rcx\n\t"
15091        $$emit$$"jl      L_tail\n\t"
15092        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15093        $$emit$$"add     0x20,rax\n\t"
15094        $$emit$$"sub     0x4,rcx\n\t"
15095        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15096        $$emit$$"add     0x4,rcx\n\t"
15097        $$emit$$"jle     L_end\n\t"
15098        $$emit$$"dec     rcx\n\t"
15099        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15100        $$emit$$"vmovq   xmm0,(rax)\n\t"
15101        $$emit$$"add     0x8,rax\n\t"
15102        $$emit$$"dec     rcx\n\t"
15103        $$emit$$"jge     L_sloop\n\t"
15104        $$emit$$"# L_end:\n\t"
15105     } else {
15106        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15107     }
15108   %}
15109   ins_encode %{
15110     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15111                  $tmp$$XMMRegister, true, true);
15112   %}
15113   ins_pipe(pipe_slow);
15114 %}
15115 
15116 // Large non-constant length ClearArray for AVX512 targets.
15117 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15118                              Universe dummy, rFlagsReg cr)
15119 %{
15120   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15121   match(Set dummy (ClearArray (Binary cnt base) val));
15122   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15123 
15124   format %{ $$template
15125     if (UseFastStosb) {
15126        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15127        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15128        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15129     } else if (UseXMMForObjInit) {
15130        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15131        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15132        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15133        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15134        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15135        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15136        $$emit$$"add     0x40,rax\n\t"
15137        $$emit$$"# L_zero_64_bytes:\n\t"
15138        $$emit$$"sub     0x8,rcx\n\t"
15139        $$emit$$"jge     L_loop\n\t"
15140        $$emit$$"add     0x4,rcx\n\t"
15141        $$emit$$"jl      L_tail\n\t"
15142        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15143        $$emit$$"add     0x20,rax\n\t"
15144        $$emit$$"sub     0x4,rcx\n\t"
15145        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15146        $$emit$$"add     0x4,rcx\n\t"
15147        $$emit$$"jle     L_end\n\t"
15148        $$emit$$"dec     rcx\n\t"
15149        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15150        $$emit$$"vmovq   xmm0,(rax)\n\t"
15151        $$emit$$"add     0x8,rax\n\t"
15152        $$emit$$"dec     rcx\n\t"
15153        $$emit$$"jge     L_sloop\n\t"
15154        $$emit$$"# L_end:\n\t"
15155     } else {
15156        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15157        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15158     }
15159   %}
15160   ins_encode %{
15161     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15162                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15163   %}
15164   ins_pipe(pipe_slow);
15165 %}
15166 
15167 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15168                                        Universe dummy, rFlagsReg cr)
15169 %{
15170   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15171   match(Set dummy (ClearArray (Binary cnt base) val));
15172   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15173 
15174   format %{ $$template
15175     if (UseFastStosb) {
15176        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15177        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15178        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15179     } else if (UseXMMForObjInit) {
15180        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15181        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15182        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15183        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15184        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15185        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15186        $$emit$$"add     0x40,rax\n\t"
15187        $$emit$$"# L_zero_64_bytes:\n\t"
15188        $$emit$$"sub     0x8,rcx\n\t"
15189        $$emit$$"jge     L_loop\n\t"
15190        $$emit$$"add     0x4,rcx\n\t"
15191        $$emit$$"jl      L_tail\n\t"
15192        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15193        $$emit$$"add     0x20,rax\n\t"
15194        $$emit$$"sub     0x4,rcx\n\t"
15195        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15196        $$emit$$"add     0x4,rcx\n\t"
15197        $$emit$$"jle     L_end\n\t"
15198        $$emit$$"dec     rcx\n\t"
15199        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15200        $$emit$$"vmovq   xmm0,(rax)\n\t"
15201        $$emit$$"add     0x8,rax\n\t"
15202        $$emit$$"dec     rcx\n\t"
15203        $$emit$$"jge     L_sloop\n\t"
15204        $$emit$$"# L_end:\n\t"
15205     } else {
15206        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15207        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15208     }
15209   %}
15210   ins_encode %{
15211     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15212                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15213   %}
15214   ins_pipe(pipe_slow);
15215 %}
15216 
15217 // Small constant length ClearArray for AVX512 targets.
15218 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15219 %{
15220   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15221             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15222   match(Set dummy (ClearArray (Binary cnt base) val));
15223   ins_cost(100);
15224   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15225   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15226   ins_encode %{
15227     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15228   %}
15229   ins_pipe(pipe_slow);
15230 %}
15231 
15232 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15233                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15234 %{
15235   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15236   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15237   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15238 
15239   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15240   ins_encode %{
15241     __ string_compare($str1$$Register, $str2$$Register,
15242                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15243                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15244   %}
15245   ins_pipe( pipe_slow );
15246 %}
15247 
15248 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15249                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15250 %{
15251   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15252   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15253   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15254 
15255   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15256   ins_encode %{
15257     __ string_compare($str1$$Register, $str2$$Register,
15258                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15259                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15260   %}
15261   ins_pipe( pipe_slow );
15262 %}
15263 
15264 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15265                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15266 %{
15267   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15268   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15269   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15270 
15271   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15272   ins_encode %{
15273     __ string_compare($str1$$Register, $str2$$Register,
15274                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15275                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15276   %}
15277   ins_pipe( pipe_slow );
15278 %}
15279 
15280 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15281                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15282 %{
15283   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15284   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15285   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15286 
15287   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15288   ins_encode %{
15289     __ string_compare($str1$$Register, $str2$$Register,
15290                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15291                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15292   %}
15293   ins_pipe( pipe_slow );
15294 %}
15295 
15296 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15297                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15298 %{
15299   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15300   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15301   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15302 
15303   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15304   ins_encode %{
15305     __ string_compare($str1$$Register, $str2$$Register,
15306                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15307                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15308   %}
15309   ins_pipe( pipe_slow );
15310 %}
15311 
15312 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15313                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15314 %{
15315   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15316   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15317   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15318 
15319   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15320   ins_encode %{
15321     __ string_compare($str1$$Register, $str2$$Register,
15322                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15323                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15324   %}
15325   ins_pipe( pipe_slow );
15326 %}
15327 
15328 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15329                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15330 %{
15331   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15332   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15333   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15334 
15335   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15336   ins_encode %{
15337     __ string_compare($str2$$Register, $str1$$Register,
15338                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15339                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15340   %}
15341   ins_pipe( pipe_slow );
15342 %}
15343 
15344 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15345                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15346 %{
15347   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15348   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15349   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15350 
15351   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15352   ins_encode %{
15353     __ string_compare($str2$$Register, $str1$$Register,
15354                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15355                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15356   %}
15357   ins_pipe( pipe_slow );
15358 %}
15359 
15360 // fast search of substring with known size.
15361 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15362                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15363 %{
15364   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15365   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15366   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15367 
15368   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15369   ins_encode %{
15370     int icnt2 = (int)$int_cnt2$$constant;
15371     if (icnt2 >= 16) {
15372       // IndexOf for constant substrings with size >= 16 elements
15373       // which don't need to be loaded through stack.
15374       __ string_indexofC8($str1$$Register, $str2$$Register,
15375                           $cnt1$$Register, $cnt2$$Register,
15376                           icnt2, $result$$Register,
15377                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15378     } else {
15379       // Small strings are loaded through stack if they cross page boundary.
15380       __ string_indexof($str1$$Register, $str2$$Register,
15381                         $cnt1$$Register, $cnt2$$Register,
15382                         icnt2, $result$$Register,
15383                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15384     }
15385   %}
15386   ins_pipe( pipe_slow );
15387 %}
15388 
15389 // fast search of substring with known size.
15390 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15391                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15392 %{
15393   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15394   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15395   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15396 
15397   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15398   ins_encode %{
15399     int icnt2 = (int)$int_cnt2$$constant;
15400     if (icnt2 >= 8) {
15401       // IndexOf for constant substrings with size >= 8 elements
15402       // which don't need to be loaded through stack.
15403       __ string_indexofC8($str1$$Register, $str2$$Register,
15404                           $cnt1$$Register, $cnt2$$Register,
15405                           icnt2, $result$$Register,
15406                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15407     } else {
15408       // Small strings are loaded through stack if they cross page boundary.
15409       __ string_indexof($str1$$Register, $str2$$Register,
15410                         $cnt1$$Register, $cnt2$$Register,
15411                         icnt2, $result$$Register,
15412                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15413     }
15414   %}
15415   ins_pipe( pipe_slow );
15416 %}
15417 
15418 // fast search of substring with known size.
15419 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15420                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15421 %{
15422   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15423   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15424   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15425 
15426   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15427   ins_encode %{
15428     int icnt2 = (int)$int_cnt2$$constant;
15429     if (icnt2 >= 8) {
15430       // IndexOf for constant substrings with size >= 8 elements
15431       // which don't need to be loaded through stack.
15432       __ string_indexofC8($str1$$Register, $str2$$Register,
15433                           $cnt1$$Register, $cnt2$$Register,
15434                           icnt2, $result$$Register,
15435                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15436     } else {
15437       // Small strings are loaded through stack if they cross page boundary.
15438       __ string_indexof($str1$$Register, $str2$$Register,
15439                         $cnt1$$Register, $cnt2$$Register,
15440                         icnt2, $result$$Register,
15441                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15442     }
15443   %}
15444   ins_pipe( pipe_slow );
15445 %}
15446 
15447 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15448                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15449 %{
15450   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15451   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15452   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15453 
15454   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15455   ins_encode %{
15456     __ string_indexof($str1$$Register, $str2$$Register,
15457                       $cnt1$$Register, $cnt2$$Register,
15458                       (-1), $result$$Register,
15459                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15460   %}
15461   ins_pipe( pipe_slow );
15462 %}
15463 
15464 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15465                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15466 %{
15467   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15468   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15469   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15470 
15471   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15472   ins_encode %{
15473     __ string_indexof($str1$$Register, $str2$$Register,
15474                       $cnt1$$Register, $cnt2$$Register,
15475                       (-1), $result$$Register,
15476                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15477   %}
15478   ins_pipe( pipe_slow );
15479 %}
15480 
15481 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15482                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15483 %{
15484   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15485   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15486   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15487 
15488   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15489   ins_encode %{
15490     __ string_indexof($str1$$Register, $str2$$Register,
15491                       $cnt1$$Register, $cnt2$$Register,
15492                       (-1), $result$$Register,
15493                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15494   %}
15495   ins_pipe( pipe_slow );
15496 %}
15497 
15498 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15499                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15500 %{
15501   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15502   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15503   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15504   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15505   ins_encode %{
15506     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15507                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15508   %}
15509   ins_pipe( pipe_slow );
15510 %}
15511 
15512 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15513                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15514 %{
15515   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15516   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15517   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15518   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15519   ins_encode %{
15520     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15521                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15522   %}
15523   ins_pipe( pipe_slow );
15524 %}
15525 
15526 // fast string equals
15527 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15528                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15529 %{
15530   predicate(!VM_Version::supports_avx512vlbw());
15531   match(Set result (StrEquals (Binary str1 str2) cnt));
15532   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15533 
15534   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15535   ins_encode %{
15536     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15537                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15538                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15539   %}
15540   ins_pipe( pipe_slow );
15541 %}
15542 
15543 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15544                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15545 %{
15546   predicate(VM_Version::supports_avx512vlbw());
15547   match(Set result (StrEquals (Binary str1 str2) cnt));
15548   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15549 
15550   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15551   ins_encode %{
15552     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15553                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15554                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15555   %}
15556   ins_pipe( pipe_slow );
15557 %}
15558 
15559 // fast array equals
15560 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15561                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15562 %{
15563   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15564   match(Set result (AryEq ary1 ary2));
15565   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15566 
15567   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15568   ins_encode %{
15569     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15570                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15571                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15572   %}
15573   ins_pipe( pipe_slow );
15574 %}
15575 
15576 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15577                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15578 %{
15579   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15580   match(Set result (AryEq ary1 ary2));
15581   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15582 
15583   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15584   ins_encode %{
15585     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15586                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15587                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15588   %}
15589   ins_pipe( pipe_slow );
15590 %}
15591 
15592 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15593                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15594 %{
15595   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15596   match(Set result (AryEq ary1 ary2));
15597   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15598 
15599   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15600   ins_encode %{
15601     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15602                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15603                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15604   %}
15605   ins_pipe( pipe_slow );
15606 %}
15607 
15608 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15609                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15610 %{
15611   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15612   match(Set result (AryEq ary1 ary2));
15613   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15614 
15615   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15616   ins_encode %{
15617     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15618                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15619                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15620   %}
15621   ins_pipe( pipe_slow );
15622 %}
15623 
15624 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15625                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15626                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15627                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15628                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15629 %{
15630   predicate(UseAVX >= 2);
15631   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15632   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15633          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15634          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15635          USE basic_type, KILL cr);
15636 
15637   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
15638   ins_encode %{
15639     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15640                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15641                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15642                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15643                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15644                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15645                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15646   %}
15647   ins_pipe( pipe_slow );
15648 %}
15649 
15650 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15651                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15652 %{
15653   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15654   match(Set result (CountPositives ary1 len));
15655   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15656 
15657   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15658   ins_encode %{
15659     __ count_positives($ary1$$Register, $len$$Register,
15660                        $result$$Register, $tmp3$$Register,
15661                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15662   %}
15663   ins_pipe( pipe_slow );
15664 %}
15665 
15666 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15667                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15668 %{
15669   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15670   match(Set result (CountPositives ary1 len));
15671   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15672 
15673   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15674   ins_encode %{
15675     __ count_positives($ary1$$Register, $len$$Register,
15676                        $result$$Register, $tmp3$$Register,
15677                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15678   %}
15679   ins_pipe( pipe_slow );
15680 %}
15681 
15682 // fast char[] to byte[] compression
15683 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15684                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15685   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15686   match(Set result (StrCompressedCopy src (Binary dst len)));
15687   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15688          USE_KILL len, KILL tmp5, KILL cr);
15689 
15690   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15691   ins_encode %{
15692     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15693                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15694                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15695                            knoreg, knoreg);
15696   %}
15697   ins_pipe( pipe_slow );
15698 %}
15699 
15700 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15701                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15702   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15703   match(Set result (StrCompressedCopy src (Binary dst len)));
15704   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15705          USE_KILL len, KILL tmp5, KILL cr);
15706 
15707   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15708   ins_encode %{
15709     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15710                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15711                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15712                            $ktmp1$$KRegister, $ktmp2$$KRegister);
15713   %}
15714   ins_pipe( pipe_slow );
15715 %}
15716 // fast byte[] to char[] inflation
15717 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15718                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15719   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15720   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15721   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15722 
15723   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15724   ins_encode %{
15725     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15726                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15727   %}
15728   ins_pipe( pipe_slow );
15729 %}
15730 
15731 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15732                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15733   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15734   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15735   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15736 
15737   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15738   ins_encode %{
15739     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15740                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15741   %}
15742   ins_pipe( pipe_slow );
15743 %}
15744 
15745 // encode char[] to byte[] in ISO_8859_1
15746 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15747                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15748                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15749   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15750   match(Set result (EncodeISOArray src (Binary dst len)));
15751   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15752 
15753   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15754   ins_encode %{
15755     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15756                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15757                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15758   %}
15759   ins_pipe( pipe_slow );
15760 %}
15761 
15762 // encode char[] to byte[] in ASCII
15763 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15764                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15765                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15766   predicate(((EncodeISOArrayNode*)n)->is_ascii());
15767   match(Set result (EncodeISOArray src (Binary dst len)));
15768   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15769 
15770   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15771   ins_encode %{
15772     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15773                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15774                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15775   %}
15776   ins_pipe( pipe_slow );
15777 %}
15778 
15779 //----------Overflow Math Instructions-----------------------------------------
15780 
15781 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15782 %{
15783   match(Set cr (OverflowAddI op1 op2));
15784   effect(DEF cr, USE_KILL op1, USE op2);
15785 
15786   format %{ "addl    $op1, $op2\t# overflow check int" %}
15787 
15788   ins_encode %{
15789     __ addl($op1$$Register, $op2$$Register);
15790   %}
15791   ins_pipe(ialu_reg_reg);
15792 %}
15793 
15794 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15795 %{
15796   match(Set cr (OverflowAddI op1 op2));
15797   effect(DEF cr, USE_KILL op1, USE op2);
15798 
15799   format %{ "addl    $op1, $op2\t# overflow check int" %}
15800 
15801   ins_encode %{
15802     __ addl($op1$$Register, $op2$$constant);
15803   %}
15804   ins_pipe(ialu_reg_reg);
15805 %}
15806 
15807 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15808 %{
15809   match(Set cr (OverflowAddL op1 op2));
15810   effect(DEF cr, USE_KILL op1, USE op2);
15811 
15812   format %{ "addq    $op1, $op2\t# overflow check long" %}
15813   ins_encode %{
15814     __ addq($op1$$Register, $op2$$Register);
15815   %}
15816   ins_pipe(ialu_reg_reg);
15817 %}
15818 
15819 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15820 %{
15821   match(Set cr (OverflowAddL op1 op2));
15822   effect(DEF cr, USE_KILL op1, USE op2);
15823 
15824   format %{ "addq    $op1, $op2\t# overflow check long" %}
15825   ins_encode %{
15826     __ addq($op1$$Register, $op2$$constant);
15827   %}
15828   ins_pipe(ialu_reg_reg);
15829 %}
15830 
15831 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15832 %{
15833   match(Set cr (OverflowSubI op1 op2));
15834 
15835   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
15836   ins_encode %{
15837     __ cmpl($op1$$Register, $op2$$Register);
15838   %}
15839   ins_pipe(ialu_reg_reg);
15840 %}
15841 
15842 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15843 %{
15844   match(Set cr (OverflowSubI op1 op2));
15845 
15846   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
15847   ins_encode %{
15848     __ cmpl($op1$$Register, $op2$$constant);
15849   %}
15850   ins_pipe(ialu_reg_reg);
15851 %}
15852 
15853 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15854 %{
15855   match(Set cr (OverflowSubL op1 op2));
15856 
15857   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
15858   ins_encode %{
15859     __ cmpq($op1$$Register, $op2$$Register);
15860   %}
15861   ins_pipe(ialu_reg_reg);
15862 %}
15863 
15864 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15865 %{
15866   match(Set cr (OverflowSubL op1 op2));
15867 
15868   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
15869   ins_encode %{
15870     __ cmpq($op1$$Register, $op2$$constant);
15871   %}
15872   ins_pipe(ialu_reg_reg);
15873 %}
15874 
15875 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15876 %{
15877   match(Set cr (OverflowSubI zero op2));
15878   effect(DEF cr, USE_KILL op2);
15879 
15880   format %{ "negl    $op2\t# overflow check int" %}
15881   ins_encode %{
15882     __ negl($op2$$Register);
15883   %}
15884   ins_pipe(ialu_reg_reg);
15885 %}
15886 
15887 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
15888 %{
15889   match(Set cr (OverflowSubL zero op2));
15890   effect(DEF cr, USE_KILL op2);
15891 
15892   format %{ "negq    $op2\t# overflow check long" %}
15893   ins_encode %{
15894     __ negq($op2$$Register);
15895   %}
15896   ins_pipe(ialu_reg_reg);
15897 %}
15898 
15899 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15900 %{
15901   match(Set cr (OverflowMulI op1 op2));
15902   effect(DEF cr, USE_KILL op1, USE op2);
15903 
15904   format %{ "imull    $op1, $op2\t# overflow check int" %}
15905   ins_encode %{
15906     __ imull($op1$$Register, $op2$$Register);
15907   %}
15908   ins_pipe(ialu_reg_reg_alu0);
15909 %}
15910 
15911 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
15912 %{
15913   match(Set cr (OverflowMulI op1 op2));
15914   effect(DEF cr, TEMP tmp, USE op1, USE op2);
15915 
15916   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
15917   ins_encode %{
15918     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
15919   %}
15920   ins_pipe(ialu_reg_reg_alu0);
15921 %}
15922 
15923 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15924 %{
15925   match(Set cr (OverflowMulL op1 op2));
15926   effect(DEF cr, USE_KILL op1, USE op2);
15927 
15928   format %{ "imulq    $op1, $op2\t# overflow check long" %}
15929   ins_encode %{
15930     __ imulq($op1$$Register, $op2$$Register);
15931   %}
15932   ins_pipe(ialu_reg_reg_alu0);
15933 %}
15934 
15935 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
15936 %{
15937   match(Set cr (OverflowMulL op1 op2));
15938   effect(DEF cr, TEMP tmp, USE op1, USE op2);
15939 
15940   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
15941   ins_encode %{
15942     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
15943   %}
15944   ins_pipe(ialu_reg_reg_alu0);
15945 %}
15946 
15947 
15948 //----------Control Flow Instructions------------------------------------------
15949 // Signed compare Instructions
15950 
15951 // XXX more variants!!
15952 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15953 %{
15954   match(Set cr (CmpI op1 op2));
15955   effect(DEF cr, USE op1, USE op2);
15956 
15957   format %{ "cmpl    $op1, $op2" %}
15958   ins_encode %{
15959     __ cmpl($op1$$Register, $op2$$Register);
15960   %}
15961   ins_pipe(ialu_cr_reg_reg);
15962 %}
15963 
15964 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15965 %{
15966   match(Set cr (CmpI op1 op2));
15967 
15968   format %{ "cmpl    $op1, $op2" %}
15969   ins_encode %{
15970     __ cmpl($op1$$Register, $op2$$constant);
15971   %}
15972   ins_pipe(ialu_cr_reg_imm);
15973 %}
15974 
15975 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
15976 %{
15977   match(Set cr (CmpI op1 (LoadI op2)));
15978 
15979   ins_cost(500); // XXX
15980   format %{ "cmpl    $op1, $op2" %}
15981   ins_encode %{
15982     __ cmpl($op1$$Register, $op2$$Address);
15983   %}
15984   ins_pipe(ialu_cr_reg_mem);
15985 %}
15986 
15987 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
15988 %{
15989   match(Set cr (CmpI src zero));
15990 
15991   format %{ "testl   $src, $src" %}
15992   ins_encode %{
15993     __ testl($src$$Register, $src$$Register);
15994   %}
15995   ins_pipe(ialu_cr_reg_imm);
15996 %}
15997 
15998 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
15999 %{
16000   match(Set cr (CmpI (AndI src con) zero));
16001 
16002   format %{ "testl   $src, $con" %}
16003   ins_encode %{
16004     __ testl($src$$Register, $con$$constant);
16005   %}
16006   ins_pipe(ialu_cr_reg_imm);
16007 %}
16008 
16009 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16010 %{
16011   match(Set cr (CmpI (AndI src1 src2) zero));
16012 
16013   format %{ "testl   $src1, $src2" %}
16014   ins_encode %{
16015     __ testl($src1$$Register, $src2$$Register);
16016   %}
16017   ins_pipe(ialu_cr_reg_imm);
16018 %}
16019 
16020 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16021 %{
16022   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16023 
16024   format %{ "testl   $src, $mem" %}
16025   ins_encode %{
16026     __ testl($src$$Register, $mem$$Address);
16027   %}
16028   ins_pipe(ialu_cr_reg_mem);
16029 %}
16030 
16031 // Unsigned compare Instructions; really, same as signed except they
16032 // produce an rFlagsRegU instead of rFlagsReg.
16033 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16034 %{
16035   match(Set cr (CmpU op1 op2));
16036 
16037   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16038   ins_encode %{
16039     __ cmpl($op1$$Register, $op2$$Register);
16040   %}
16041   ins_pipe(ialu_cr_reg_reg);
16042 %}
16043 
16044 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16045 %{
16046   match(Set cr (CmpU op1 op2));
16047 
16048   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16049   ins_encode %{
16050     __ cmpl($op1$$Register, $op2$$constant);
16051   %}
16052   ins_pipe(ialu_cr_reg_imm);
16053 %}
16054 
16055 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16056 %{
16057   match(Set cr (CmpU op1 (LoadI op2)));
16058 
16059   ins_cost(500); // XXX
16060   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16061   ins_encode %{
16062     __ cmpl($op1$$Register, $op2$$Address);
16063   %}
16064   ins_pipe(ialu_cr_reg_mem);
16065 %}
16066 
16067 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16068 %{
16069   match(Set cr (CmpU src zero));
16070 
16071   format %{ "testl   $src, $src\t# unsigned" %}
16072   ins_encode %{
16073     __ testl($src$$Register, $src$$Register);
16074   %}
16075   ins_pipe(ialu_cr_reg_imm);
16076 %}
16077 
16078 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16079 %{
16080   match(Set cr (CmpP op1 op2));
16081 
16082   format %{ "cmpq    $op1, $op2\t# ptr" %}
16083   ins_encode %{
16084     __ cmpq($op1$$Register, $op2$$Register);
16085   %}
16086   ins_pipe(ialu_cr_reg_reg);
16087 %}
16088 
16089 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16090 %{
16091   match(Set cr (CmpP op1 (LoadP op2)));
16092   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16093 
16094   ins_cost(500); // XXX
16095   format %{ "cmpq    $op1, $op2\t# ptr" %}
16096   ins_encode %{
16097     __ cmpq($op1$$Register, $op2$$Address);
16098   %}
16099   ins_pipe(ialu_cr_reg_mem);
16100 %}
16101 
16102 // XXX this is generalized by compP_rReg_mem???
16103 // Compare raw pointer (used in out-of-heap check).
16104 // Only works because non-oop pointers must be raw pointers
16105 // and raw pointers have no anti-dependencies.
16106 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16107 %{
16108   predicate(n->in(2)->in(2)->bottom_type()->isa_rawptr() != nullptr &&
16109             n->in(2)->as_Load()->barrier_data() == 0);
16110   match(Set cr (CmpP op1 (LoadP op2)));
16111 
16112   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16113   ins_encode %{
16114     __ cmpq($op1$$Register, $op2$$Address);
16115   %}
16116   ins_pipe(ialu_cr_reg_mem);
16117 %}
16118 
16119 // This will generate a signed flags result. This should be OK since
16120 // any compare to a zero should be eq/neq.
16121 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16122 %{
16123   match(Set cr (CmpP src zero));
16124 
16125   format %{ "testq   $src, $src\t# ptr" %}
16126   ins_encode %{
16127     __ testq($src$$Register, $src$$Register);
16128   %}
16129   ins_pipe(ialu_cr_reg_imm);
16130 %}
16131 
16132 // This will generate a signed flags result. This should be OK since
16133 // any compare to a zero should be eq/neq.
16134 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16135 %{
16136   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16137             n->in(1)->as_Load()->barrier_data() == 0);
16138   match(Set cr (CmpP (LoadP op) zero));
16139 
16140   ins_cost(500); // XXX
16141   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16142   ins_encode %{
16143     __ testq($op$$Address, 0xFFFFFFFF);
16144   %}
16145   ins_pipe(ialu_cr_reg_imm);
16146 %}
16147 
16148 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16149 %{
16150   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16151             n->in(1)->as_Load()->barrier_data() == 0);
16152   match(Set cr (CmpP (LoadP mem) zero));
16153 
16154   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16155   ins_encode %{
16156     __ cmpq(r12, $mem$$Address);
16157   %}
16158   ins_pipe(ialu_cr_reg_mem);
16159 %}
16160 
16161 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16162 %{
16163   match(Set cr (CmpN op1 op2));
16164 
16165   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16166   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16167   ins_pipe(ialu_cr_reg_reg);
16168 %}
16169 
16170 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16171 %{
16172   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16173   match(Set cr (CmpN src (LoadN mem)));
16174 
16175   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16176   ins_encode %{
16177     __ cmpl($src$$Register, $mem$$Address);
16178   %}
16179   ins_pipe(ialu_cr_reg_mem);
16180 %}
16181 
16182 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16183   match(Set cr (CmpN op1 op2));
16184 
16185   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16186   ins_encode %{
16187     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16188   %}
16189   ins_pipe(ialu_cr_reg_imm);
16190 %}
16191 
16192 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16193 %{
16194   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16195   match(Set cr (CmpN src (LoadN mem)));
16196 
16197   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16198   ins_encode %{
16199     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16200   %}
16201   ins_pipe(ialu_cr_reg_mem);
16202 %}
16203 
16204 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16205   match(Set cr (CmpN op1 op2));
16206 
16207   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16208   ins_encode %{
16209     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16210   %}
16211   ins_pipe(ialu_cr_reg_imm);
16212 %}
16213 
16214 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16215 %{
16216   predicate(!UseCompactObjectHeaders);
16217   match(Set cr (CmpN src (LoadNKlass mem)));
16218 
16219   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16220   ins_encode %{
16221     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16222   %}
16223   ins_pipe(ialu_cr_reg_mem);
16224 %}
16225 
16226 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16227   match(Set cr (CmpN src zero));
16228 
16229   format %{ "testl   $src, $src\t# compressed ptr" %}
16230   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16231   ins_pipe(ialu_cr_reg_imm);
16232 %}
16233 
16234 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16235 %{
16236   predicate(CompressedOops::base() != nullptr &&
16237             n->in(1)->as_Load()->barrier_data() == 0);
16238   match(Set cr (CmpN (LoadN mem) zero));
16239 
16240   ins_cost(500); // XXX
16241   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16242   ins_encode %{
16243     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16244   %}
16245   ins_pipe(ialu_cr_reg_mem);
16246 %}
16247 
16248 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16249 %{
16250   predicate(CompressedOops::base() == nullptr &&
16251             n->in(1)->as_Load()->barrier_data() == 0);
16252   match(Set cr (CmpN (LoadN mem) zero));
16253 
16254   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16255   ins_encode %{
16256     __ cmpl(r12, $mem$$Address);
16257   %}
16258   ins_pipe(ialu_cr_reg_mem);
16259 %}
16260 
16261 // Yanked all unsigned pointer compare operations.
16262 // Pointer compares are done with CmpP which is already unsigned.
16263 
16264 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16265 %{
16266   match(Set cr (CmpL op1 op2));
16267 
16268   format %{ "cmpq    $op1, $op2" %}
16269   ins_encode %{
16270     __ cmpq($op1$$Register, $op2$$Register);
16271   %}
16272   ins_pipe(ialu_cr_reg_reg);
16273 %}
16274 
16275 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16276 %{
16277   match(Set cr (CmpL op1 op2));
16278 
16279   format %{ "cmpq    $op1, $op2" %}
16280   ins_encode %{
16281     __ cmpq($op1$$Register, $op2$$constant);
16282   %}
16283   ins_pipe(ialu_cr_reg_imm);
16284 %}
16285 
16286 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16287 %{
16288   match(Set cr (CmpL op1 (LoadL op2)));
16289 
16290   format %{ "cmpq    $op1, $op2" %}
16291   ins_encode %{
16292     __ cmpq($op1$$Register, $op2$$Address);
16293   %}
16294   ins_pipe(ialu_cr_reg_mem);
16295 %}
16296 
16297 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16298 %{
16299   match(Set cr (CmpL src zero));
16300 
16301   format %{ "testq   $src, $src" %}
16302   ins_encode %{
16303     __ testq($src$$Register, $src$$Register);
16304   %}
16305   ins_pipe(ialu_cr_reg_imm);
16306 %}
16307 
16308 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16309 %{
16310   match(Set cr (CmpL (AndL src con) zero));
16311 
16312   format %{ "testq   $src, $con\t# long" %}
16313   ins_encode %{
16314     __ testq($src$$Register, $con$$constant);
16315   %}
16316   ins_pipe(ialu_cr_reg_imm);
16317 %}
16318 
16319 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16320 %{
16321   match(Set cr (CmpL (AndL src1 src2) zero));
16322 
16323   format %{ "testq   $src1, $src2\t# long" %}
16324   ins_encode %{
16325     __ testq($src1$$Register, $src2$$Register);
16326   %}
16327   ins_pipe(ialu_cr_reg_imm);
16328 %}
16329 
16330 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16331 %{
16332   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16333 
16334   format %{ "testq   $src, $mem" %}
16335   ins_encode %{
16336     __ testq($src$$Register, $mem$$Address);
16337   %}
16338   ins_pipe(ialu_cr_reg_mem);
16339 %}
16340 
16341 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16342 %{
16343   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16344 
16345   format %{ "testq   $src, $mem" %}
16346   ins_encode %{
16347     __ testq($src$$Register, $mem$$Address);
16348   %}
16349   ins_pipe(ialu_cr_reg_mem);
16350 %}
16351 
16352 // Manifest a CmpU result in an integer register.  Very painful.
16353 // This is the test to avoid.
16354 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16355 %{
16356   match(Set dst (CmpU3 src1 src2));
16357   effect(KILL flags);
16358 
16359   ins_cost(275); // XXX
16360   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16361             "movl    $dst, -1\n\t"
16362             "jb,u    done\n\t"
16363             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16364     "done:" %}
16365   ins_encode %{
16366     Label done;
16367     __ cmpl($src1$$Register, $src2$$Register);
16368     __ movl($dst$$Register, -1);
16369     __ jccb(Assembler::below, done);
16370     __ setcc(Assembler::notZero, $dst$$Register);
16371     __ bind(done);
16372   %}
16373   ins_pipe(pipe_slow);
16374 %}
16375 
16376 // Manifest a CmpL result in an integer register.  Very painful.
16377 // This is the test to avoid.
16378 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16379 %{
16380   match(Set dst (CmpL3 src1 src2));
16381   effect(KILL flags);
16382 
16383   ins_cost(275); // XXX
16384   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16385             "movl    $dst, -1\n\t"
16386             "jl,s    done\n\t"
16387             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16388     "done:" %}
16389   ins_encode %{
16390     Label done;
16391     __ cmpq($src1$$Register, $src2$$Register);
16392     __ movl($dst$$Register, -1);
16393     __ jccb(Assembler::less, done);
16394     __ setcc(Assembler::notZero, $dst$$Register);
16395     __ bind(done);
16396   %}
16397   ins_pipe(pipe_slow);
16398 %}
16399 
16400 // Manifest a CmpUL result in an integer register.  Very painful.
16401 // This is the test to avoid.
16402 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16403 %{
16404   match(Set dst (CmpUL3 src1 src2));
16405   effect(KILL flags);
16406 
16407   ins_cost(275); // XXX
16408   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16409             "movl    $dst, -1\n\t"
16410             "jb,u    done\n\t"
16411             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16412     "done:" %}
16413   ins_encode %{
16414     Label done;
16415     __ cmpq($src1$$Register, $src2$$Register);
16416     __ movl($dst$$Register, -1);
16417     __ jccb(Assembler::below, done);
16418     __ setcc(Assembler::notZero, $dst$$Register);
16419     __ bind(done);
16420   %}
16421   ins_pipe(pipe_slow);
16422 %}
16423 
16424 // Unsigned long compare Instructions; really, same as signed long except they
16425 // produce an rFlagsRegU instead of rFlagsReg.
16426 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16427 %{
16428   match(Set cr (CmpUL op1 op2));
16429 
16430   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16431   ins_encode %{
16432     __ cmpq($op1$$Register, $op2$$Register);
16433   %}
16434   ins_pipe(ialu_cr_reg_reg);
16435 %}
16436 
16437 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16438 %{
16439   match(Set cr (CmpUL op1 op2));
16440 
16441   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16442   ins_encode %{
16443     __ cmpq($op1$$Register, $op2$$constant);
16444   %}
16445   ins_pipe(ialu_cr_reg_imm);
16446 %}
16447 
16448 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16449 %{
16450   match(Set cr (CmpUL op1 (LoadL op2)));
16451 
16452   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16453   ins_encode %{
16454     __ cmpq($op1$$Register, $op2$$Address);
16455   %}
16456   ins_pipe(ialu_cr_reg_mem);
16457 %}
16458 
16459 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16460 %{
16461   match(Set cr (CmpUL src zero));
16462 
16463   format %{ "testq   $src, $src\t# unsigned" %}
16464   ins_encode %{
16465     __ testq($src$$Register, $src$$Register);
16466   %}
16467   ins_pipe(ialu_cr_reg_imm);
16468 %}
16469 
16470 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16471 %{
16472   match(Set cr (CmpI (LoadB mem) imm));
16473 
16474   ins_cost(125);
16475   format %{ "cmpb    $mem, $imm" %}
16476   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16477   ins_pipe(ialu_cr_reg_mem);
16478 %}
16479 
16480 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16481 %{
16482   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16483 
16484   ins_cost(125);
16485   format %{ "testb   $mem, $imm\t# ubyte" %}
16486   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16487   ins_pipe(ialu_cr_reg_mem);
16488 %}
16489 
16490 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16491 %{
16492   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16493 
16494   ins_cost(125);
16495   format %{ "testb   $mem, $imm\t# byte" %}
16496   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16497   ins_pipe(ialu_cr_reg_mem);
16498 %}
16499 
16500 //----------Max and Min--------------------------------------------------------
16501 // Min Instructions
16502 
16503 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16504 %{
16505   predicate(!UseAPX);
16506   effect(USE_DEF dst, USE src, USE cr);
16507 
16508   format %{ "cmovlgt $dst, $src\t# min" %}
16509   ins_encode %{
16510     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16511   %}
16512   ins_pipe(pipe_cmov_reg);
16513 %}
16514 
16515 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16516 %{
16517   predicate(UseAPX);
16518   effect(DEF dst, USE src1, USE src2, USE cr);
16519 
16520   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16521   ins_encode %{
16522     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16523   %}
16524   ins_pipe(pipe_cmov_reg);
16525 %}
16526 
16527 instruct minI_rReg(rRegI dst, rRegI src)
16528 %{
16529   predicate(!UseAPX);
16530   match(Set dst (MinI dst src));
16531 
16532   ins_cost(200);
16533   expand %{
16534     rFlagsReg cr;
16535     compI_rReg(cr, dst, src);
16536     cmovI_reg_g(dst, src, cr);
16537   %}
16538 %}
16539 
16540 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16541 %{
16542   predicate(UseAPX);
16543   match(Set dst (MinI src1 src2));
16544   effect(DEF dst, USE src1, USE src2);
16545   flag(PD::Flag_ndd_demotable_opr1);
16546 
16547   ins_cost(200);
16548   expand %{
16549     rFlagsReg cr;
16550     compI_rReg(cr, src1, src2);
16551     cmovI_reg_g_ndd(dst, src1, src2, cr);
16552   %}
16553 %}
16554 
16555 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16556 %{
16557   predicate(!UseAPX);
16558   effect(USE_DEF dst, USE src, USE cr);
16559 
16560   format %{ "cmovllt $dst, $src\t# max" %}
16561   ins_encode %{
16562     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16563   %}
16564   ins_pipe(pipe_cmov_reg);
16565 %}
16566 
16567 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16568 %{
16569   predicate(UseAPX);
16570   effect(DEF dst, USE src1, USE src2, USE cr);
16571 
16572   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16573   ins_encode %{
16574     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16575   %}
16576   ins_pipe(pipe_cmov_reg);
16577 %}
16578 
16579 instruct maxI_rReg(rRegI dst, rRegI src)
16580 %{
16581   predicate(!UseAPX);
16582   match(Set dst (MaxI dst src));
16583 
16584   ins_cost(200);
16585   expand %{
16586     rFlagsReg cr;
16587     compI_rReg(cr, dst, src);
16588     cmovI_reg_l(dst, src, cr);
16589   %}
16590 %}
16591 
16592 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16593 %{
16594   predicate(UseAPX);
16595   match(Set dst (MaxI src1 src2));
16596   effect(DEF dst, USE src1, USE src2);
16597   flag(PD::Flag_ndd_demotable_opr1);
16598 
16599   ins_cost(200);
16600   expand %{
16601     rFlagsReg cr;
16602     compI_rReg(cr, src1, src2);
16603     cmovI_reg_l_ndd(dst, src1, src2, cr);
16604   %}
16605 %}
16606 
16607 // ============================================================================
16608 // Branch Instructions
16609 
16610 // Jump Direct - Label defines a relative address from JMP+1
16611 instruct jmpDir(label labl)
16612 %{
16613   match(Goto);
16614   effect(USE labl);
16615 
16616   ins_cost(300);
16617   format %{ "jmp     $labl" %}
16618   size(5);
16619   ins_encode %{
16620     Label* L = $labl$$label;
16621     __ jmp(*L, false); // Always long jump
16622   %}
16623   ins_pipe(pipe_jmp);
16624 %}
16625 
16626 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16627 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16628 %{
16629   match(If cop cr);
16630   effect(USE labl);
16631 
16632   ins_cost(300);
16633   format %{ "j$cop     $labl" %}
16634   size(6);
16635   ins_encode %{
16636     Label* L = $labl$$label;
16637     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16638   %}
16639   ins_pipe(pipe_jcc);
16640 %}
16641 
16642 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16643 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16644 %{
16645   match(CountedLoopEnd cop cr);
16646   effect(USE labl);
16647 
16648   ins_cost(300);
16649   format %{ "j$cop     $labl\t# loop end" %}
16650   size(6);
16651   ins_encode %{
16652     Label* L = $labl$$label;
16653     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16654   %}
16655   ins_pipe(pipe_jcc);
16656 %}
16657 
16658 // Jump Direct Conditional - using unsigned comparison
16659 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16660   match(If cop cmp);
16661   effect(USE labl);
16662 
16663   ins_cost(300);
16664   format %{ "j$cop,u   $labl" %}
16665   size(6);
16666   ins_encode %{
16667     Label* L = $labl$$label;
16668     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16669   %}
16670   ins_pipe(pipe_jcc);
16671 %}
16672 
16673 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16674   match(If cop cmp);
16675   effect(USE labl);
16676 
16677   ins_cost(200);
16678   format %{ "j$cop,u   $labl" %}
16679   size(6);
16680   ins_encode %{
16681     Label* L = $labl$$label;
16682     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16683   %}
16684   ins_pipe(pipe_jcc);
16685 %}
16686 
16687 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16688   match(If cop cmp);
16689   effect(USE labl);
16690 
16691   ins_cost(200);
16692   format %{ $$template
16693     if ($cop$$cmpcode == Assembler::notEqual) {
16694       $$emit$$"jp,u    $labl\n\t"
16695       $$emit$$"j$cop,u   $labl"
16696     } else {
16697       $$emit$$"jp,u    done\n\t"
16698       $$emit$$"j$cop,u   $labl\n\t"
16699       $$emit$$"done:"
16700     }
16701   %}
16702   ins_encode %{
16703     Label* l = $labl$$label;
16704     if ($cop$$cmpcode == Assembler::notEqual) {
16705       __ jcc(Assembler::parity, *l, false);
16706       __ jcc(Assembler::notEqual, *l, false);
16707     } else if ($cop$$cmpcode == Assembler::equal) {
16708       Label done;
16709       __ jccb(Assembler::parity, done);
16710       __ jcc(Assembler::equal, *l, false);
16711       __ bind(done);
16712     } else {
16713        ShouldNotReachHere();
16714     }
16715   %}
16716   ins_pipe(pipe_jcc);
16717 %}
16718 
16719 // Jump Direct Conditional - using signed and unsigned comparison
16720 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16721   match(If cop cmp);
16722   effect(USE labl);
16723 
16724   ins_cost(200);
16725   format %{ "j$cop,su   $labl" %}
16726   size(6);
16727   ins_encode %{
16728     Label* L = $labl$$label;
16729     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16730   %}
16731   ins_pipe(pipe_jcc);
16732 %}
16733 
16734 // ============================================================================
16735 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
16736 // superklass array for an instance of the superklass.  Set a hidden
16737 // internal cache on a hit (cache is checked with exposed code in
16738 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
16739 // encoding ALSO sets flags.
16740 
16741 instruct partialSubtypeCheck(rdi_RegP result,
16742                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16743                              rFlagsReg cr)
16744 %{
16745   match(Set result (PartialSubtypeCheck sub super));
16746   predicate(!UseSecondarySupersTable);
16747   effect(KILL rcx, KILL cr);
16748 
16749   ins_cost(1100);  // slightly larger than the next version
16750   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16751             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16752             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16753             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16754             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
16755             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16756             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
16757     "miss:\t" %}
16758 
16759   ins_encode %{
16760     Label miss;
16761     // NB: Callers may assume that, when $result is a valid register,
16762     // check_klass_subtype_slow_path_linear sets it to a nonzero
16763     // value.
16764     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16765                                             $rcx$$Register, $result$$Register,
16766                                             nullptr, &miss,
16767                                             /*set_cond_codes:*/ true);
16768     __ xorptr($result$$Register, $result$$Register);
16769     __ bind(miss);
16770   %}
16771 
16772   ins_pipe(pipe_slow);
16773 %}
16774 
16775 // ============================================================================
16776 // Two versions of hashtable-based partialSubtypeCheck, both used when
16777 // we need to search for a super class in the secondary supers array.
16778 // The first is used when we don't know _a priori_ the class being
16779 // searched for. The second, far more common, is used when we do know:
16780 // this is used for instanceof, checkcast, and any case where C2 can
16781 // determine it by constant propagation.
16782 
16783 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16784                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16785                                        rFlagsReg cr)
16786 %{
16787   match(Set result (PartialSubtypeCheck sub super));
16788   predicate(UseSecondarySupersTable);
16789   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16790 
16791   ins_cost(1000);
16792   format %{ "partialSubtypeCheck $result, $sub, $super" %}
16793 
16794   ins_encode %{
16795     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16796 					 $temp3$$Register, $temp4$$Register, $result$$Register);
16797   %}
16798 
16799   ins_pipe(pipe_slow);
16800 %}
16801 
16802 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16803                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16804                                        rFlagsReg cr)
16805 %{
16806   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16807   predicate(UseSecondarySupersTable);
16808   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16809 
16810   ins_cost(700);  // smaller than the next version
16811   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16812 
16813   ins_encode %{
16814     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16815     if (InlineSecondarySupersTest) {
16816       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16817                                        $temp3$$Register, $temp4$$Register, $result$$Register,
16818                                        super_klass_slot);
16819     } else {
16820       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16821     }
16822   %}
16823 
16824   ins_pipe(pipe_slow);
16825 %}
16826 
16827 // ============================================================================
16828 // Branch Instructions -- short offset versions
16829 //
16830 // These instructions are used to replace jumps of a long offset (the default
16831 // match) with jumps of a shorter offset.  These instructions are all tagged
16832 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16833 // match rules in general matching.  Instead, the ADLC generates a conversion
16834 // method in the MachNode which can be used to do in-place replacement of the
16835 // long variant with the shorter variant.  The compiler will determine if a
16836 // branch can be taken by the is_short_branch_offset() predicate in the machine
16837 // specific code section of the file.
16838 
16839 // Jump Direct - Label defines a relative address from JMP+1
16840 instruct jmpDir_short(label labl) %{
16841   match(Goto);
16842   effect(USE labl);
16843 
16844   ins_cost(300);
16845   format %{ "jmp,s   $labl" %}
16846   size(2);
16847   ins_encode %{
16848     Label* L = $labl$$label;
16849     __ jmpb(*L);
16850   %}
16851   ins_pipe(pipe_jmp);
16852   ins_short_branch(1);
16853 %}
16854 
16855 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16856 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16857   match(If cop cr);
16858   effect(USE labl);
16859 
16860   ins_cost(300);
16861   format %{ "j$cop,s   $labl" %}
16862   size(2);
16863   ins_encode %{
16864     Label* L = $labl$$label;
16865     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16866   %}
16867   ins_pipe(pipe_jcc);
16868   ins_short_branch(1);
16869 %}
16870 
16871 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16872 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16873   match(CountedLoopEnd cop cr);
16874   effect(USE labl);
16875 
16876   ins_cost(300);
16877   format %{ "j$cop,s   $labl\t# loop end" %}
16878   size(2);
16879   ins_encode %{
16880     Label* L = $labl$$label;
16881     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16882   %}
16883   ins_pipe(pipe_jcc);
16884   ins_short_branch(1);
16885 %}
16886 
16887 // Jump Direct Conditional - using unsigned comparison
16888 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16889   match(If cop cmp);
16890   effect(USE labl);
16891 
16892   ins_cost(300);
16893   format %{ "j$cop,us  $labl" %}
16894   size(2);
16895   ins_encode %{
16896     Label* L = $labl$$label;
16897     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16898   %}
16899   ins_pipe(pipe_jcc);
16900   ins_short_branch(1);
16901 %}
16902 
16903 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16904   match(If cop cmp);
16905   effect(USE labl);
16906 
16907   ins_cost(300);
16908   format %{ "j$cop,us  $labl" %}
16909   size(2);
16910   ins_encode %{
16911     Label* L = $labl$$label;
16912     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16913   %}
16914   ins_pipe(pipe_jcc);
16915   ins_short_branch(1);
16916 %}
16917 
16918 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16919   match(If cop cmp);
16920   effect(USE labl);
16921 
16922   ins_cost(300);
16923   format %{ $$template
16924     if ($cop$$cmpcode == Assembler::notEqual) {
16925       $$emit$$"jp,u,s  $labl\n\t"
16926       $$emit$$"j$cop,u,s  $labl"
16927     } else {
16928       $$emit$$"jp,u,s  done\n\t"
16929       $$emit$$"j$cop,u,s  $labl\n\t"
16930       $$emit$$"done:"
16931     }
16932   %}
16933   size(4);
16934   ins_encode %{
16935     Label* l = $labl$$label;
16936     if ($cop$$cmpcode == Assembler::notEqual) {
16937       __ jccb(Assembler::parity, *l);
16938       __ jccb(Assembler::notEqual, *l);
16939     } else if ($cop$$cmpcode == Assembler::equal) {
16940       Label done;
16941       __ jccb(Assembler::parity, done);
16942       __ jccb(Assembler::equal, *l);
16943       __ bind(done);
16944     } else {
16945        ShouldNotReachHere();
16946     }
16947   %}
16948   ins_pipe(pipe_jcc);
16949   ins_short_branch(1);
16950 %}
16951 
16952 // Jump Direct Conditional - using signed and unsigned comparison
16953 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16954   match(If cop cmp);
16955   effect(USE labl);
16956 
16957   ins_cost(300);
16958   format %{ "j$cop,sus  $labl" %}
16959   size(2);
16960   ins_encode %{
16961     Label* L = $labl$$label;
16962     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16963   %}
16964   ins_pipe(pipe_jcc);
16965   ins_short_branch(1);
16966 %}
16967 
16968 // ============================================================================
16969 // inlined locking and unlocking
16970 
16971 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
16972   match(Set cr (FastLock object box));
16973   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
16974   ins_cost(300);
16975   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
16976   ins_encode %{
16977     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16978   %}
16979   ins_pipe(pipe_slow);
16980 %}
16981 
16982 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
16983   match(Set cr (FastUnlock object rax_reg));
16984   effect(TEMP tmp, USE_KILL rax_reg);
16985   ins_cost(300);
16986   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
16987   ins_encode %{
16988     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16989   %}
16990   ins_pipe(pipe_slow);
16991 %}
16992 
16993 
16994 // ============================================================================
16995 // Safepoint Instructions
16996 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
16997 %{
16998   match(SafePoint poll);
16999   effect(KILL cr, USE poll);
17000 
17001   format %{ "testl   rax, [$poll]\t"
17002             "# Safepoint: poll for GC" %}
17003   ins_cost(125);
17004   ins_encode %{
17005     __ relocate(relocInfo::poll_type);
17006     address pre_pc = __ pc();
17007     __ testl(rax, Address($poll$$Register, 0));
17008     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17009   %}
17010   ins_pipe(ialu_reg_mem);
17011 %}
17012 
17013 instruct mask_all_evexL(kReg dst, rRegL src) %{
17014   match(Set dst (MaskAll src));
17015   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17016   ins_encode %{
17017     int mask_len = Matcher::vector_length(this);
17018     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17019   %}
17020   ins_pipe( pipe_slow );
17021 %}
17022 
17023 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17024   predicate(Matcher::vector_length(n) > 32);
17025   match(Set dst (MaskAll src));
17026   effect(TEMP tmp);
17027   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17028   ins_encode %{
17029     int mask_len = Matcher::vector_length(this);
17030     __ movslq($tmp$$Register, $src$$Register);
17031     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17032   %}
17033   ins_pipe( pipe_slow );
17034 %}
17035 
17036 // ============================================================================
17037 // Procedure Call/Return Instructions
17038 // Call Java Static Instruction
17039 // Note: If this code changes, the corresponding ret_addr_offset() and
17040 //       compute_padding() functions will have to be adjusted.
17041 instruct CallStaticJavaDirect(method meth) %{
17042   match(CallStaticJava);
17043   effect(USE meth);
17044 
17045   ins_cost(300);
17046   format %{ "call,static " %}
17047   opcode(0xE8); /* E8 cd */
17048   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17049   ins_pipe(pipe_slow);
17050   ins_alignment(4);
17051 %}
17052 
17053 // Call Java Dynamic Instruction
17054 // Note: If this code changes, the corresponding ret_addr_offset() and
17055 //       compute_padding() functions will have to be adjusted.
17056 instruct CallDynamicJavaDirect(method meth)
17057 %{
17058   match(CallDynamicJava);
17059   effect(USE meth);
17060 
17061   ins_cost(300);
17062   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17063             "call,dynamic " %}
17064   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17065   ins_pipe(pipe_slow);
17066   ins_alignment(4);
17067 %}
17068 
17069 // Call Runtime Instruction
17070 instruct CallRuntimeDirect(method meth)
17071 %{
17072   match(CallRuntime);
17073   effect(USE meth);
17074 
17075   ins_cost(300);
17076   format %{ "call,runtime " %}
17077   ins_encode(clear_avx, Java_To_Runtime(meth));
17078   ins_pipe(pipe_slow);
17079 %}
17080 
17081 // Call runtime without safepoint
17082 instruct CallLeafDirect(method meth)
17083 %{
17084   match(CallLeaf);
17085   effect(USE meth);
17086 
17087   ins_cost(300);
17088   format %{ "call_leaf,runtime " %}
17089   ins_encode(clear_avx, Java_To_Runtime(meth));
17090   ins_pipe(pipe_slow);
17091 %}
17092 
17093 // Call runtime without safepoint and with vector arguments
17094 instruct CallLeafDirectVector(method meth)
17095 %{
17096   match(CallLeafVector);
17097   effect(USE meth);
17098 
17099   ins_cost(300);
17100   format %{ "call_leaf,vector " %}
17101   ins_encode(Java_To_Runtime(meth));
17102   ins_pipe(pipe_slow);
17103 %}
17104 
17105 // Call runtime without safepoint
17106 // entry point is null, target holds the address to call
17107 instruct CallLeafNoFPInDirect(rRegP target)
17108 %{
17109   predicate(n->as_Call()->entry_point() == nullptr);
17110   match(CallLeafNoFP target);
17111 
17112   ins_cost(300);
17113   format %{ "call_leaf_nofp,runtime indirect " %}
17114   ins_encode %{
17115      __ call($target$$Register);
17116   %}
17117 
17118   ins_pipe(pipe_slow);
17119 %}
17120 
17121 // Call runtime without safepoint
17122 instruct CallLeafNoFPDirect(method meth)
17123 %{
17124   predicate(n->as_Call()->entry_point() != nullptr);
17125   match(CallLeafNoFP);
17126   effect(USE meth);
17127 
17128   ins_cost(300);
17129   format %{ "call_leaf_nofp,runtime " %}
17130   ins_encode(clear_avx, Java_To_Runtime(meth));
17131   ins_pipe(pipe_slow);
17132 %}
17133 
17134 // Return Instruction
17135 // Remove the return address & jump to it.
17136 // Notice: We always emit a nop after a ret to make sure there is room
17137 // for safepoint patching
17138 instruct Ret()
17139 %{
17140   match(Return);
17141 
17142   format %{ "ret" %}
17143   ins_encode %{
17144     __ ret(0);
17145   %}
17146   ins_pipe(pipe_jmp);
17147 %}
17148 
17149 // Tail Call; Jump from runtime stub to Java code.
17150 // Also known as an 'interprocedural jump'.
17151 // Target of jump will eventually return to caller.
17152 // TailJump below removes the return address.
17153 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17154 // emitted just above the TailCall which has reset rbp to the caller state.
17155 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17156 %{
17157   match(TailCall jump_target method_ptr);
17158 
17159   ins_cost(300);
17160   format %{ "jmp     $jump_target\t# rbx holds method" %}
17161   ins_encode %{
17162     __ jmp($jump_target$$Register);
17163   %}
17164   ins_pipe(pipe_jmp);
17165 %}
17166 
17167 // Tail Jump; remove the return address; jump to target.
17168 // TailCall above leaves the return address around.
17169 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17170 %{
17171   match(TailJump jump_target ex_oop);
17172 
17173   ins_cost(300);
17174   format %{ "popq    rdx\t# pop return address\n\t"
17175             "jmp     $jump_target" %}
17176   ins_encode %{
17177     __ popq(as_Register(RDX_enc));
17178     __ jmp($jump_target$$Register);
17179   %}
17180   ins_pipe(pipe_jmp);
17181 %}
17182 
17183 // Forward exception.
17184 instruct ForwardExceptionjmp()
17185 %{
17186   match(ForwardException);
17187 
17188   format %{ "jmp     forward_exception_stub" %}
17189   ins_encode %{
17190     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17191   %}
17192   ins_pipe(pipe_jmp);
17193 %}
17194 
17195 // Create exception oop: created by stack-crawling runtime code.
17196 // Created exception is now available to this handler, and is setup
17197 // just prior to jumping to this handler.  No code emitted.
17198 instruct CreateException(rax_RegP ex_oop)
17199 %{
17200   match(Set ex_oop (CreateEx));
17201 
17202   size(0);
17203   // use the following format syntax
17204   format %{ "# exception oop is in rax; no code emitted" %}
17205   ins_encode();
17206   ins_pipe(empty);
17207 %}
17208 
17209 // Rethrow exception:
17210 // The exception oop will come in the first argument position.
17211 // Then JUMP (not call) to the rethrow stub code.
17212 instruct RethrowException()
17213 %{
17214   match(Rethrow);
17215 
17216   // use the following format syntax
17217   format %{ "jmp     rethrow_stub" %}
17218   ins_encode %{
17219     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17220   %}
17221   ins_pipe(pipe_jmp);
17222 %}
17223 
17224 // ============================================================================
17225 // This name is KNOWN by the ADLC and cannot be changed.
17226 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17227 // for this guy.
17228 instruct tlsLoadP(r15_RegP dst) %{
17229   match(Set dst (ThreadLocal));
17230   effect(DEF dst);
17231 
17232   size(0);
17233   format %{ "# TLS is in R15" %}
17234   ins_encode( /*empty encoding*/ );
17235   ins_pipe(ialu_reg_reg);
17236 %}
17237 
17238 instruct addF_reg(regF dst, regF src) %{
17239   predicate(UseAVX == 0);
17240   match(Set dst (AddF dst src));
17241 
17242   format %{ "addss   $dst, $src" %}
17243   ins_cost(150);
17244   ins_encode %{
17245     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17246   %}
17247   ins_pipe(pipe_slow);
17248 %}
17249 
17250 instruct addF_mem(regF dst, memory src) %{
17251   predicate(UseAVX == 0);
17252   match(Set dst (AddF dst (LoadF src)));
17253 
17254   format %{ "addss   $dst, $src" %}
17255   ins_cost(150);
17256   ins_encode %{
17257     __ addss($dst$$XMMRegister, $src$$Address);
17258   %}
17259   ins_pipe(pipe_slow);
17260 %}
17261 
17262 instruct addF_imm(regF dst, immF con) %{
17263   predicate(UseAVX == 0);
17264   match(Set dst (AddF dst con));
17265   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17266   ins_cost(150);
17267   ins_encode %{
17268     __ addss($dst$$XMMRegister, $constantaddress($con));
17269   %}
17270   ins_pipe(pipe_slow);
17271 %}
17272 
17273 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17274   predicate(UseAVX > 0);
17275   match(Set dst (AddF src1 src2));
17276 
17277   format %{ "vaddss  $dst, $src1, $src2" %}
17278   ins_cost(150);
17279   ins_encode %{
17280     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17281   %}
17282   ins_pipe(pipe_slow);
17283 %}
17284 
17285 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17286   predicate(UseAVX > 0);
17287   match(Set dst (AddF src1 (LoadF src2)));
17288 
17289   format %{ "vaddss  $dst, $src1, $src2" %}
17290   ins_cost(150);
17291   ins_encode %{
17292     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17293   %}
17294   ins_pipe(pipe_slow);
17295 %}
17296 
17297 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17298   predicate(UseAVX > 0);
17299   match(Set dst (AddF src con));
17300 
17301   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17302   ins_cost(150);
17303   ins_encode %{
17304     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17305   %}
17306   ins_pipe(pipe_slow);
17307 %}
17308 
17309 instruct addD_reg(regD dst, regD src) %{
17310   predicate(UseAVX == 0);
17311   match(Set dst (AddD dst src));
17312 
17313   format %{ "addsd   $dst, $src" %}
17314   ins_cost(150);
17315   ins_encode %{
17316     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17317   %}
17318   ins_pipe(pipe_slow);
17319 %}
17320 
17321 instruct addD_mem(regD dst, memory src) %{
17322   predicate(UseAVX == 0);
17323   match(Set dst (AddD dst (LoadD src)));
17324 
17325   format %{ "addsd   $dst, $src" %}
17326   ins_cost(150);
17327   ins_encode %{
17328     __ addsd($dst$$XMMRegister, $src$$Address);
17329   %}
17330   ins_pipe(pipe_slow);
17331 %}
17332 
17333 instruct addD_imm(regD dst, immD con) %{
17334   predicate(UseAVX == 0);
17335   match(Set dst (AddD dst con));
17336   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17337   ins_cost(150);
17338   ins_encode %{
17339     __ addsd($dst$$XMMRegister, $constantaddress($con));
17340   %}
17341   ins_pipe(pipe_slow);
17342 %}
17343 
17344 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17345   predicate(UseAVX > 0);
17346   match(Set dst (AddD src1 src2));
17347 
17348   format %{ "vaddsd  $dst, $src1, $src2" %}
17349   ins_cost(150);
17350   ins_encode %{
17351     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17352   %}
17353   ins_pipe(pipe_slow);
17354 %}
17355 
17356 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17357   predicate(UseAVX > 0);
17358   match(Set dst (AddD src1 (LoadD src2)));
17359 
17360   format %{ "vaddsd  $dst, $src1, $src2" %}
17361   ins_cost(150);
17362   ins_encode %{
17363     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17364   %}
17365   ins_pipe(pipe_slow);
17366 %}
17367 
17368 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17369   predicate(UseAVX > 0);
17370   match(Set dst (AddD src con));
17371 
17372   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17373   ins_cost(150);
17374   ins_encode %{
17375     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17376   %}
17377   ins_pipe(pipe_slow);
17378 %}
17379 
17380 instruct subF_reg(regF dst, regF src) %{
17381   predicate(UseAVX == 0);
17382   match(Set dst (SubF dst src));
17383 
17384   format %{ "subss   $dst, $src" %}
17385   ins_cost(150);
17386   ins_encode %{
17387     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17388   %}
17389   ins_pipe(pipe_slow);
17390 %}
17391 
17392 instruct subF_mem(regF dst, memory src) %{
17393   predicate(UseAVX == 0);
17394   match(Set dst (SubF dst (LoadF src)));
17395 
17396   format %{ "subss   $dst, $src" %}
17397   ins_cost(150);
17398   ins_encode %{
17399     __ subss($dst$$XMMRegister, $src$$Address);
17400   %}
17401   ins_pipe(pipe_slow);
17402 %}
17403 
17404 instruct subF_imm(regF dst, immF con) %{
17405   predicate(UseAVX == 0);
17406   match(Set dst (SubF dst con));
17407   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17408   ins_cost(150);
17409   ins_encode %{
17410     __ subss($dst$$XMMRegister, $constantaddress($con));
17411   %}
17412   ins_pipe(pipe_slow);
17413 %}
17414 
17415 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17416   predicate(UseAVX > 0);
17417   match(Set dst (SubF src1 src2));
17418 
17419   format %{ "vsubss  $dst, $src1, $src2" %}
17420   ins_cost(150);
17421   ins_encode %{
17422     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17423   %}
17424   ins_pipe(pipe_slow);
17425 %}
17426 
17427 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17428   predicate(UseAVX > 0);
17429   match(Set dst (SubF src1 (LoadF src2)));
17430 
17431   format %{ "vsubss  $dst, $src1, $src2" %}
17432   ins_cost(150);
17433   ins_encode %{
17434     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17435   %}
17436   ins_pipe(pipe_slow);
17437 %}
17438 
17439 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17440   predicate(UseAVX > 0);
17441   match(Set dst (SubF src con));
17442 
17443   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17444   ins_cost(150);
17445   ins_encode %{
17446     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17447   %}
17448   ins_pipe(pipe_slow);
17449 %}
17450 
17451 instruct subD_reg(regD dst, regD src) %{
17452   predicate(UseAVX == 0);
17453   match(Set dst (SubD dst src));
17454 
17455   format %{ "subsd   $dst, $src" %}
17456   ins_cost(150);
17457   ins_encode %{
17458     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17459   %}
17460   ins_pipe(pipe_slow);
17461 %}
17462 
17463 instruct subD_mem(regD dst, memory src) %{
17464   predicate(UseAVX == 0);
17465   match(Set dst (SubD dst (LoadD src)));
17466 
17467   format %{ "subsd   $dst, $src" %}
17468   ins_cost(150);
17469   ins_encode %{
17470     __ subsd($dst$$XMMRegister, $src$$Address);
17471   %}
17472   ins_pipe(pipe_slow);
17473 %}
17474 
17475 instruct subD_imm(regD dst, immD con) %{
17476   predicate(UseAVX == 0);
17477   match(Set dst (SubD dst con));
17478   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17479   ins_cost(150);
17480   ins_encode %{
17481     __ subsd($dst$$XMMRegister, $constantaddress($con));
17482   %}
17483   ins_pipe(pipe_slow);
17484 %}
17485 
17486 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17487   predicate(UseAVX > 0);
17488   match(Set dst (SubD src1 src2));
17489 
17490   format %{ "vsubsd  $dst, $src1, $src2" %}
17491   ins_cost(150);
17492   ins_encode %{
17493     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17494   %}
17495   ins_pipe(pipe_slow);
17496 %}
17497 
17498 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17499   predicate(UseAVX > 0);
17500   match(Set dst (SubD src1 (LoadD src2)));
17501 
17502   format %{ "vsubsd  $dst, $src1, $src2" %}
17503   ins_cost(150);
17504   ins_encode %{
17505     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17506   %}
17507   ins_pipe(pipe_slow);
17508 %}
17509 
17510 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17511   predicate(UseAVX > 0);
17512   match(Set dst (SubD src con));
17513 
17514   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17515   ins_cost(150);
17516   ins_encode %{
17517     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17518   %}
17519   ins_pipe(pipe_slow);
17520 %}
17521 
17522 instruct mulF_reg(regF dst, regF src) %{
17523   predicate(UseAVX == 0);
17524   match(Set dst (MulF dst src));
17525 
17526   format %{ "mulss   $dst, $src" %}
17527   ins_cost(150);
17528   ins_encode %{
17529     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17530   %}
17531   ins_pipe(pipe_slow);
17532 %}
17533 
17534 instruct mulF_mem(regF dst, memory src) %{
17535   predicate(UseAVX == 0);
17536   match(Set dst (MulF dst (LoadF src)));
17537 
17538   format %{ "mulss   $dst, $src" %}
17539   ins_cost(150);
17540   ins_encode %{
17541     __ mulss($dst$$XMMRegister, $src$$Address);
17542   %}
17543   ins_pipe(pipe_slow);
17544 %}
17545 
17546 instruct mulF_imm(regF dst, immF con) %{
17547   predicate(UseAVX == 0);
17548   match(Set dst (MulF dst con));
17549   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17550   ins_cost(150);
17551   ins_encode %{
17552     __ mulss($dst$$XMMRegister, $constantaddress($con));
17553   %}
17554   ins_pipe(pipe_slow);
17555 %}
17556 
17557 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17558   predicate(UseAVX > 0);
17559   match(Set dst (MulF src1 src2));
17560 
17561   format %{ "vmulss  $dst, $src1, $src2" %}
17562   ins_cost(150);
17563   ins_encode %{
17564     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17565   %}
17566   ins_pipe(pipe_slow);
17567 %}
17568 
17569 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17570   predicate(UseAVX > 0);
17571   match(Set dst (MulF src1 (LoadF src2)));
17572 
17573   format %{ "vmulss  $dst, $src1, $src2" %}
17574   ins_cost(150);
17575   ins_encode %{
17576     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17577   %}
17578   ins_pipe(pipe_slow);
17579 %}
17580 
17581 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17582   predicate(UseAVX > 0);
17583   match(Set dst (MulF src con));
17584 
17585   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17586   ins_cost(150);
17587   ins_encode %{
17588     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17589   %}
17590   ins_pipe(pipe_slow);
17591 %}
17592 
17593 instruct mulD_reg(regD dst, regD src) %{
17594   predicate(UseAVX == 0);
17595   match(Set dst (MulD dst src));
17596 
17597   format %{ "mulsd   $dst, $src" %}
17598   ins_cost(150);
17599   ins_encode %{
17600     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17601   %}
17602   ins_pipe(pipe_slow);
17603 %}
17604 
17605 instruct mulD_mem(regD dst, memory src) %{
17606   predicate(UseAVX == 0);
17607   match(Set dst (MulD dst (LoadD src)));
17608 
17609   format %{ "mulsd   $dst, $src" %}
17610   ins_cost(150);
17611   ins_encode %{
17612     __ mulsd($dst$$XMMRegister, $src$$Address);
17613   %}
17614   ins_pipe(pipe_slow);
17615 %}
17616 
17617 instruct mulD_imm(regD dst, immD con) %{
17618   predicate(UseAVX == 0);
17619   match(Set dst (MulD dst con));
17620   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17621   ins_cost(150);
17622   ins_encode %{
17623     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17624   %}
17625   ins_pipe(pipe_slow);
17626 %}
17627 
17628 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17629   predicate(UseAVX > 0);
17630   match(Set dst (MulD src1 src2));
17631 
17632   format %{ "vmulsd  $dst, $src1, $src2" %}
17633   ins_cost(150);
17634   ins_encode %{
17635     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17636   %}
17637   ins_pipe(pipe_slow);
17638 %}
17639 
17640 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17641   predicate(UseAVX > 0);
17642   match(Set dst (MulD src1 (LoadD src2)));
17643 
17644   format %{ "vmulsd  $dst, $src1, $src2" %}
17645   ins_cost(150);
17646   ins_encode %{
17647     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17648   %}
17649   ins_pipe(pipe_slow);
17650 %}
17651 
17652 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17653   predicate(UseAVX > 0);
17654   match(Set dst (MulD src con));
17655 
17656   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17657   ins_cost(150);
17658   ins_encode %{
17659     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17660   %}
17661   ins_pipe(pipe_slow);
17662 %}
17663 
17664 instruct divF_reg(regF dst, regF src) %{
17665   predicate(UseAVX == 0);
17666   match(Set dst (DivF dst src));
17667 
17668   format %{ "divss   $dst, $src" %}
17669   ins_cost(150);
17670   ins_encode %{
17671     __ divss($dst$$XMMRegister, $src$$XMMRegister);
17672   %}
17673   ins_pipe(pipe_slow);
17674 %}
17675 
17676 instruct divF_mem(regF dst, memory src) %{
17677   predicate(UseAVX == 0);
17678   match(Set dst (DivF dst (LoadF src)));
17679 
17680   format %{ "divss   $dst, $src" %}
17681   ins_cost(150);
17682   ins_encode %{
17683     __ divss($dst$$XMMRegister, $src$$Address);
17684   %}
17685   ins_pipe(pipe_slow);
17686 %}
17687 
17688 instruct divF_imm(regF dst, immF con) %{
17689   predicate(UseAVX == 0);
17690   match(Set dst (DivF dst con));
17691   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17692   ins_cost(150);
17693   ins_encode %{
17694     __ divss($dst$$XMMRegister, $constantaddress($con));
17695   %}
17696   ins_pipe(pipe_slow);
17697 %}
17698 
17699 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17700   predicate(UseAVX > 0);
17701   match(Set dst (DivF src1 src2));
17702 
17703   format %{ "vdivss  $dst, $src1, $src2" %}
17704   ins_cost(150);
17705   ins_encode %{
17706     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17707   %}
17708   ins_pipe(pipe_slow);
17709 %}
17710 
17711 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17712   predicate(UseAVX > 0);
17713   match(Set dst (DivF src1 (LoadF src2)));
17714 
17715   format %{ "vdivss  $dst, $src1, $src2" %}
17716   ins_cost(150);
17717   ins_encode %{
17718     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17719   %}
17720   ins_pipe(pipe_slow);
17721 %}
17722 
17723 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17724   predicate(UseAVX > 0);
17725   match(Set dst (DivF src con));
17726 
17727   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17728   ins_cost(150);
17729   ins_encode %{
17730     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17731   %}
17732   ins_pipe(pipe_slow);
17733 %}
17734 
17735 instruct divD_reg(regD dst, regD src) %{
17736   predicate(UseAVX == 0);
17737   match(Set dst (DivD dst src));
17738 
17739   format %{ "divsd   $dst, $src" %}
17740   ins_cost(150);
17741   ins_encode %{
17742     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17743   %}
17744   ins_pipe(pipe_slow);
17745 %}
17746 
17747 instruct divD_mem(regD dst, memory src) %{
17748   predicate(UseAVX == 0);
17749   match(Set dst (DivD dst (LoadD src)));
17750 
17751   format %{ "divsd   $dst, $src" %}
17752   ins_cost(150);
17753   ins_encode %{
17754     __ divsd($dst$$XMMRegister, $src$$Address);
17755   %}
17756   ins_pipe(pipe_slow);
17757 %}
17758 
17759 instruct divD_imm(regD dst, immD con) %{
17760   predicate(UseAVX == 0);
17761   match(Set dst (DivD dst con));
17762   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17763   ins_cost(150);
17764   ins_encode %{
17765     __ divsd($dst$$XMMRegister, $constantaddress($con));
17766   %}
17767   ins_pipe(pipe_slow);
17768 %}
17769 
17770 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17771   predicate(UseAVX > 0);
17772   match(Set dst (DivD src1 src2));
17773 
17774   format %{ "vdivsd  $dst, $src1, $src2" %}
17775   ins_cost(150);
17776   ins_encode %{
17777     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17778   %}
17779   ins_pipe(pipe_slow);
17780 %}
17781 
17782 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17783   predicate(UseAVX > 0);
17784   match(Set dst (DivD src1 (LoadD src2)));
17785 
17786   format %{ "vdivsd  $dst, $src1, $src2" %}
17787   ins_cost(150);
17788   ins_encode %{
17789     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17790   %}
17791   ins_pipe(pipe_slow);
17792 %}
17793 
17794 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17795   predicate(UseAVX > 0);
17796   match(Set dst (DivD src con));
17797 
17798   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17799   ins_cost(150);
17800   ins_encode %{
17801     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17802   %}
17803   ins_pipe(pipe_slow);
17804 %}
17805 
17806 instruct absF_reg(regF dst) %{
17807   predicate(UseAVX == 0);
17808   match(Set dst (AbsF dst));
17809   ins_cost(150);
17810   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
17811   ins_encode %{
17812     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17813   %}
17814   ins_pipe(pipe_slow);
17815 %}
17816 
17817 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17818   predicate(UseAVX > 0);
17819   match(Set dst (AbsF src));
17820   ins_cost(150);
17821   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17822   ins_encode %{
17823     int vlen_enc = Assembler::AVX_128bit;
17824     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17825               ExternalAddress(float_signmask()), vlen_enc);
17826   %}
17827   ins_pipe(pipe_slow);
17828 %}
17829 
17830 instruct absD_reg(regD dst) %{
17831   predicate(UseAVX == 0);
17832   match(Set dst (AbsD dst));
17833   ins_cost(150);
17834   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
17835             "# abs double by sign masking" %}
17836   ins_encode %{
17837     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17838   %}
17839   ins_pipe(pipe_slow);
17840 %}
17841 
17842 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17843   predicate(UseAVX > 0);
17844   match(Set dst (AbsD src));
17845   ins_cost(150);
17846   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
17847             "# abs double by sign masking" %}
17848   ins_encode %{
17849     int vlen_enc = Assembler::AVX_128bit;
17850     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17851               ExternalAddress(double_signmask()), vlen_enc);
17852   %}
17853   ins_pipe(pipe_slow);
17854 %}
17855 
17856 instruct negF_reg(regF dst) %{
17857   predicate(UseAVX == 0);
17858   match(Set dst (NegF dst));
17859   ins_cost(150);
17860   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
17861   ins_encode %{
17862     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17863   %}
17864   ins_pipe(pipe_slow);
17865 %}
17866 
17867 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17868   predicate(UseAVX > 0);
17869   match(Set dst (NegF src));
17870   ins_cost(150);
17871   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17872   ins_encode %{
17873     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17874                  ExternalAddress(float_signflip()));
17875   %}
17876   ins_pipe(pipe_slow);
17877 %}
17878 
17879 instruct negD_reg(regD dst) %{
17880   predicate(UseAVX == 0);
17881   match(Set dst (NegD dst));
17882   ins_cost(150);
17883   format %{ "xorpd   $dst, [0x8000000000000000]\t"
17884             "# neg double by sign flipping" %}
17885   ins_encode %{
17886     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17887   %}
17888   ins_pipe(pipe_slow);
17889 %}
17890 
17891 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17892   predicate(UseAVX > 0);
17893   match(Set dst (NegD src));
17894   ins_cost(150);
17895   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
17896             "# neg double by sign flipping" %}
17897   ins_encode %{
17898     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17899                  ExternalAddress(double_signflip()));
17900   %}
17901   ins_pipe(pipe_slow);
17902 %}
17903 
17904 // sqrtss instruction needs destination register to be pre initialized for best performance
17905 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17906 instruct sqrtF_reg(regF dst) %{
17907   match(Set dst (SqrtF dst));
17908   format %{ "sqrtss  $dst, $dst" %}
17909   ins_encode %{
17910     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17911   %}
17912   ins_pipe(pipe_slow);
17913 %}
17914 
17915 // sqrtsd instruction needs destination register to be pre initialized for best performance
17916 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17917 instruct sqrtD_reg(regD dst) %{
17918   match(Set dst (SqrtD dst));
17919   format %{ "sqrtsd  $dst, $dst" %}
17920   ins_encode %{
17921     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17922   %}
17923   ins_pipe(pipe_slow);
17924 %}
17925 
17926 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17927   effect(TEMP tmp);
17928   match(Set dst (ConvF2HF src));
17929   ins_cost(125);
17930   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
17931   ins_encode %{
17932     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
17933   %}
17934   ins_pipe( pipe_slow );
17935 %}
17936 
17937 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
17938   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
17939   effect(TEMP ktmp, TEMP rtmp);
17940   match(Set mem (StoreC mem (ConvF2HF src)));
17941   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
17942   ins_encode %{
17943     __ movl($rtmp$$Register, 0x1);
17944     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
17945     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
17946   %}
17947   ins_pipe( pipe_slow );
17948 %}
17949 
17950 instruct vconvF2HF(vec dst, vec src) %{
17951   match(Set dst (VectorCastF2HF src));
17952   format %{ "vector_conv_F2HF $dst $src" %}
17953   ins_encode %{
17954     int vlen_enc = vector_length_encoding(this, $src);
17955     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
17956   %}
17957   ins_pipe( pipe_slow );
17958 %}
17959 
17960 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
17961   predicate(n->as_StoreVector()->memory_size() >= 16);
17962   match(Set mem (StoreVector mem (VectorCastF2HF src)));
17963   format %{ "vcvtps2ph $mem,$src" %}
17964   ins_encode %{
17965     int vlen_enc = vector_length_encoding(this, $src);
17966     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
17967   %}
17968   ins_pipe( pipe_slow );
17969 %}
17970 
17971 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
17972   match(Set dst (ConvHF2F src));
17973   format %{ "vcvtph2ps $dst,$src" %}
17974   ins_encode %{
17975     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
17976   %}
17977   ins_pipe( pipe_slow );
17978 %}
17979 
17980 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
17981   match(Set dst (VectorCastHF2F (LoadVector mem)));
17982   format %{ "vcvtph2ps $dst,$mem" %}
17983   ins_encode %{
17984     int vlen_enc = vector_length_encoding(this);
17985     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
17986   %}
17987   ins_pipe( pipe_slow );
17988 %}
17989 
17990 instruct vconvHF2F(vec dst, vec src) %{
17991   match(Set dst (VectorCastHF2F src));
17992   ins_cost(125);
17993   format %{ "vector_conv_HF2F $dst,$src" %}
17994   ins_encode %{
17995     int vlen_enc = vector_length_encoding(this);
17996     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
17997   %}
17998   ins_pipe( pipe_slow );
17999 %}
18000 
18001 // ---------------------------------------- VectorReinterpret ------------------------------------
18002 instruct reinterpret_mask(kReg dst) %{
18003   predicate(n->bottom_type()->isa_pvectmask() &&
18004             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18005   match(Set dst (VectorReinterpret dst));
18006   ins_cost(125);
18007   format %{ "vector_reinterpret $dst\t!" %}
18008   ins_encode %{
18009     // empty
18010   %}
18011   ins_pipe( pipe_slow );
18012 %}
18013 
18014 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18015   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18016             n->bottom_type()->isa_pvectmask() &&
18017             n->in(1)->bottom_type()->isa_pvectmask() &&
18018             n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_SHORT &&
18019             n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18020   match(Set dst (VectorReinterpret src));
18021   effect(TEMP xtmp);
18022   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18023   ins_encode %{
18024      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18025      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18026      assert(src_sz == dst_sz , "src and dst size mismatch");
18027      int vlen_enc = vector_length_encoding(src_sz);
18028      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18029      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18030   %}
18031   ins_pipe( pipe_slow );
18032 %}
18033 
18034 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18035   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18036             n->bottom_type()->isa_pvectmask() &&
18037             n->in(1)->bottom_type()->isa_pvectmask() &&
18038             (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_INT ||
18039              n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_FLOAT) &&
18040             n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18041   match(Set dst (VectorReinterpret src));
18042   effect(TEMP xtmp);
18043   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18044   ins_encode %{
18045      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18046      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18047      assert(src_sz == dst_sz , "src and dst size mismatch");
18048      int vlen_enc = vector_length_encoding(src_sz);
18049      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18050      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18051   %}
18052   ins_pipe( pipe_slow );
18053 %}
18054 
18055 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18056   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18057             n->bottom_type()->isa_pvectmask() &&
18058             n->in(1)->bottom_type()->isa_pvectmask() &&
18059             (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_LONG ||
18060              n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_DOUBLE) &&
18061             n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18062   match(Set dst (VectorReinterpret src));
18063   effect(TEMP xtmp);
18064   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18065   ins_encode %{
18066      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18067      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18068      assert(src_sz == dst_sz , "src and dst size mismatch");
18069      int vlen_enc = vector_length_encoding(src_sz);
18070      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18071      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18072   %}
18073   ins_pipe( pipe_slow );
18074 %}
18075 
18076 instruct reinterpret(vec dst) %{
18077   predicate(!n->bottom_type()->isa_pvectmask() &&
18078             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18079   match(Set dst (VectorReinterpret dst));
18080   ins_cost(125);
18081   format %{ "vector_reinterpret $dst\t!" %}
18082   ins_encode %{
18083     // empty
18084   %}
18085   ins_pipe( pipe_slow );
18086 %}
18087 
18088 instruct reinterpret_expand(vec dst, vec src) %{
18089   predicate(UseAVX == 0 &&
18090             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18091   match(Set dst (VectorReinterpret src));
18092   ins_cost(125);
18093   effect(TEMP dst);
18094   format %{ "vector_reinterpret_expand $dst,$src" %}
18095   ins_encode %{
18096     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18097     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18098 
18099     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18100     if (src_vlen_in_bytes == 4) {
18101       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18102     } else {
18103       assert(src_vlen_in_bytes == 8, "");
18104       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18105     }
18106     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18107   %}
18108   ins_pipe( pipe_slow );
18109 %}
18110 
18111 instruct vreinterpret_expand4(legVec dst, vec src) %{
18112   predicate(UseAVX > 0 &&
18113             !n->bottom_type()->isa_pvectmask() &&
18114             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18115             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18116   match(Set dst (VectorReinterpret src));
18117   ins_cost(125);
18118   format %{ "vector_reinterpret_expand $dst,$src" %}
18119   ins_encode %{
18120     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18121   %}
18122   ins_pipe( pipe_slow );
18123 %}
18124 
18125 
18126 instruct vreinterpret_expand(legVec dst, vec src) %{
18127   predicate(UseAVX > 0 &&
18128             !n->bottom_type()->isa_pvectmask() &&
18129             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18130             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18131   match(Set dst (VectorReinterpret src));
18132   ins_cost(125);
18133   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18134   ins_encode %{
18135     switch (Matcher::vector_length_in_bytes(this, $src)) {
18136       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18137       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18138       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18139       default: ShouldNotReachHere();
18140     }
18141   %}
18142   ins_pipe( pipe_slow );
18143 %}
18144 
18145 instruct reinterpret_shrink(vec dst, legVec src) %{
18146   predicate(!n->bottom_type()->isa_pvectmask() &&
18147             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18148   match(Set dst (VectorReinterpret src));
18149   ins_cost(125);
18150   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18151   ins_encode %{
18152     switch (Matcher::vector_length_in_bytes(this)) {
18153       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18154       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18155       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18156       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18157       default: ShouldNotReachHere();
18158     }
18159   %}
18160   ins_pipe( pipe_slow );
18161 %}
18162 
18163 // ----------------------------------------------------------------------------------------------------
18164 
18165 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18166   match(Set dst (RoundDoubleMode src rmode));
18167   format %{ "roundsd $dst,$src" %}
18168   ins_cost(150);
18169   ins_encode %{
18170     assert(UseSSE >= 4, "required");
18171     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18172       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18173     }
18174     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18175   %}
18176   ins_pipe(pipe_slow);
18177 %}
18178 
18179 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18180   match(Set dst (RoundDoubleMode con rmode));
18181   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18182   ins_cost(150);
18183   ins_encode %{
18184     assert(UseSSE >= 4, "required");
18185     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18186   %}
18187   ins_pipe(pipe_slow);
18188 %}
18189 
18190 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18191   predicate(Matcher::vector_length(n) < 8);
18192   match(Set dst (RoundDoubleModeV src rmode));
18193   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18194   ins_encode %{
18195     assert(UseAVX > 0, "required");
18196     int vlen_enc = vector_length_encoding(this);
18197     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18198   %}
18199   ins_pipe( pipe_slow );
18200 %}
18201 
18202 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18203   predicate(Matcher::vector_length(n) == 8);
18204   match(Set dst (RoundDoubleModeV src rmode));
18205   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18206   ins_encode %{
18207     assert(UseAVX > 2, "required");
18208     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18209   %}
18210   ins_pipe( pipe_slow );
18211 %}
18212 
18213 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18214   predicate(Matcher::vector_length(n) < 8);
18215   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18216   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18217   ins_encode %{
18218     assert(UseAVX > 0, "required");
18219     int vlen_enc = vector_length_encoding(this);
18220     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18221   %}
18222   ins_pipe( pipe_slow );
18223 %}
18224 
18225 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18226   predicate(Matcher::vector_length(n) == 8);
18227   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18228   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18229   ins_encode %{
18230     assert(UseAVX > 2, "required");
18231     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18232   %}
18233   ins_pipe( pipe_slow );
18234 %}
18235 
18236 instruct onspinwait() %{
18237   match(OnSpinWait);
18238   ins_cost(200);
18239 
18240   format %{
18241     $$template
18242     $$emit$$"pause\t! membar_onspinwait"
18243   %}
18244   ins_encode %{
18245     __ pause();
18246   %}
18247   ins_pipe(pipe_slow);
18248 %}
18249 
18250 // a * b + c
18251 instruct fmaD_reg(regD a, regD b, regD c) %{
18252   match(Set c (FmaD  c (Binary a b)));
18253   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18254   ins_cost(150);
18255   ins_encode %{
18256     assert(UseFMA, "Needs FMA instructions support.");
18257     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18258   %}
18259   ins_pipe( pipe_slow );
18260 %}
18261 
18262 // a * b + c
18263 instruct fmaF_reg(regF a, regF b, regF c) %{
18264   match(Set c (FmaF  c (Binary a b)));
18265   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18266   ins_cost(150);
18267   ins_encode %{
18268     assert(UseFMA, "Needs FMA instructions support.");
18269     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18270   %}
18271   ins_pipe( pipe_slow );
18272 %}
18273 
18274 // ====================VECTOR INSTRUCTIONS=====================================
18275 
18276 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18277 instruct MoveVec2Leg(legVec dst, vec src) %{
18278   match(Set dst src);
18279   format %{ "" %}
18280   ins_encode %{
18281     ShouldNotReachHere();
18282   %}
18283   ins_pipe( fpu_reg_reg );
18284 %}
18285 
18286 instruct MoveLeg2Vec(vec dst, legVec src) %{
18287   match(Set dst src);
18288   format %{ "" %}
18289   ins_encode %{
18290     ShouldNotReachHere();
18291   %}
18292   ins_pipe( fpu_reg_reg );
18293 %}
18294 
18295 // ============================================================================
18296 
18297 // Load vectors generic operand pattern
18298 instruct loadV(vec dst, memory mem) %{
18299   match(Set dst (LoadVector mem));
18300   ins_cost(125);
18301   format %{ "load_vector $dst,$mem" %}
18302   ins_encode %{
18303     BasicType bt = Matcher::vector_element_basic_type(this);
18304     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18305   %}
18306   ins_pipe( pipe_slow );
18307 %}
18308 
18309 // Store vectors generic operand pattern.
18310 instruct storeV(memory mem, vec src) %{
18311   match(Set mem (StoreVector mem src));
18312   ins_cost(145);
18313   format %{ "store_vector $mem,$src\n\t" %}
18314   ins_encode %{
18315     switch (Matcher::vector_length_in_bytes(this, $src)) {
18316       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18317       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18318       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18319       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18320       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18321       default: ShouldNotReachHere();
18322     }
18323   %}
18324   ins_pipe( pipe_slow );
18325 %}
18326 
18327 // ---------------------------------------- Gather ------------------------------------
18328 
18329 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18330 
18331 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18332   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18333             Matcher::vector_length_in_bytes(n) <= 32);
18334   match(Set dst (LoadVectorGather mem idx));
18335   effect(TEMP dst, TEMP tmp, TEMP mask);
18336   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18337   ins_encode %{
18338     int vlen_enc = vector_length_encoding(this);
18339     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18340     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18341     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18342     __ lea($tmp$$Register, $mem$$Address);
18343     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18344   %}
18345   ins_pipe( pipe_slow );
18346 %}
18347 
18348 
18349 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18350   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18351             !is_subword_type(Matcher::vector_element_basic_type(n)));
18352   match(Set dst (LoadVectorGather mem idx));
18353   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18354   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18355   ins_encode %{
18356     int vlen_enc = vector_length_encoding(this);
18357     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18358     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18359     __ lea($tmp$$Register, $mem$$Address);
18360     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18361   %}
18362   ins_pipe( pipe_slow );
18363 %}
18364 
18365 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18366   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18367             !is_subword_type(Matcher::vector_element_basic_type(n)));
18368   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18369   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18370   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18371   ins_encode %{
18372     assert(UseAVX > 2, "sanity");
18373     int vlen_enc = vector_length_encoding(this);
18374     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18375     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18376     // Note: Since gather instruction partially updates the opmask register used
18377     // for predication hense moving mask operand to a temporary.
18378     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18379     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18380     __ lea($tmp$$Register, $mem$$Address);
18381     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18382   %}
18383   ins_pipe( pipe_slow );
18384 %}
18385 
18386 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18387   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18388   match(Set dst (LoadVectorGather mem idx_base));
18389   effect(TEMP tmp, TEMP rtmp);
18390   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18391   ins_encode %{
18392     int vlen_enc = vector_length_encoding(this);
18393     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18394     __ lea($tmp$$Register, $mem$$Address);
18395     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18396   %}
18397   ins_pipe( pipe_slow );
18398 %}
18399 
18400 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18401                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18402   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18403   match(Set dst (LoadVectorGather mem idx_base));
18404   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18405   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18406   ins_encode %{
18407     int vlen_enc = vector_length_encoding(this);
18408     int vector_len = Matcher::vector_length(this);
18409     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18410     __ lea($tmp$$Register, $mem$$Address);
18411     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18412     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18413                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18414   %}
18415   ins_pipe( pipe_slow );
18416 %}
18417 
18418 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18419   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18420   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18421   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18422   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18423   ins_encode %{
18424     int vlen_enc = vector_length_encoding(this);
18425     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18426     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18427     __ lea($tmp$$Register, $mem$$Address);
18428     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18429     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18430   %}
18431   ins_pipe( pipe_slow );
18432 %}
18433 
18434 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18435                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18436   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18437   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18438   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18439   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18440   ins_encode %{
18441     int vlen_enc = vector_length_encoding(this);
18442     int vector_len = Matcher::vector_length(this);
18443     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18444     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18445     __ lea($tmp$$Register, $mem$$Address);
18446     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18447     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18448     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18449                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18450   %}
18451   ins_pipe( pipe_slow );
18452 %}
18453 
18454 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18455   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18456   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18457   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18458   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18459   ins_encode %{
18460     int vlen_enc = vector_length_encoding(this);
18461     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18462     __ lea($tmp$$Register, $mem$$Address);
18463     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18464     if (elem_bt == T_SHORT) {
18465       __ movl($mask_idx$$Register, 0x55555555);
18466       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18467     }
18468     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18469     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18470   %}
18471   ins_pipe( pipe_slow );
18472 %}
18473 
18474 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18475                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18476   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18477   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18478   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18479   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18480   ins_encode %{
18481     int vlen_enc = vector_length_encoding(this);
18482     int vector_len = Matcher::vector_length(this);
18483     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18484     __ lea($tmp$$Register, $mem$$Address);
18485     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18486     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18487     if (elem_bt == T_SHORT) {
18488       __ movl($mask_idx$$Register, 0x55555555);
18489       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18490     }
18491     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18492     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18493                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18494   %}
18495   ins_pipe( pipe_slow );
18496 %}
18497 
18498 // ====================Scatter=======================================
18499 
18500 // Scatter INT, LONG, FLOAT, DOUBLE
18501 
18502 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18503   predicate(UseAVX > 2);
18504   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18505   effect(TEMP tmp, TEMP ktmp);
18506   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18507   ins_encode %{
18508     int vlen_enc = vector_length_encoding(this, $src);
18509     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18510 
18511     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18512     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18513 
18514     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18515     __ lea($tmp$$Register, $mem$$Address);
18516     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18517   %}
18518   ins_pipe( pipe_slow );
18519 %}
18520 
18521 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18522   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18523   effect(TEMP tmp, TEMP ktmp);
18524   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18525   ins_encode %{
18526     int vlen_enc = vector_length_encoding(this, $src);
18527     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18528     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18529     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18530     // Note: Since scatter instruction partially updates the opmask register used
18531     // for predication hense moving mask operand to a temporary.
18532     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18533     __ lea($tmp$$Register, $mem$$Address);
18534     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18535   %}
18536   ins_pipe( pipe_slow );
18537 %}
18538 
18539 // ====================REPLICATE=======================================
18540 
18541 // Replicate byte scalar to be vector
18542 instruct vReplB_reg(vec dst, rRegI src) %{
18543   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18544   match(Set dst (Replicate src));
18545   format %{ "replicateB $dst,$src" %}
18546   ins_encode %{
18547     uint vlen = Matcher::vector_length(this);
18548     if (UseAVX >= 2) {
18549       int vlen_enc = vector_length_encoding(this);
18550       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18551         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18552         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18553       } else {
18554         __ movdl($dst$$XMMRegister, $src$$Register);
18555         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18556       }
18557     } else {
18558        assert(UseAVX < 2, "");
18559       __ movdl($dst$$XMMRegister, $src$$Register);
18560       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18561       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18562       if (vlen >= 16) {
18563         assert(vlen == 16, "");
18564         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18565       }
18566     }
18567   %}
18568   ins_pipe( pipe_slow );
18569 %}
18570 
18571 instruct ReplB_mem(vec dst, memory mem) %{
18572   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18573   match(Set dst (Replicate (LoadB mem)));
18574   format %{ "replicateB $dst,$mem" %}
18575   ins_encode %{
18576     int vlen_enc = vector_length_encoding(this);
18577     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18578   %}
18579   ins_pipe( pipe_slow );
18580 %}
18581 
18582 // ====================ReplicateS=======================================
18583 
18584 instruct vReplS_reg(vec dst, rRegI src) %{
18585   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18586   match(Set dst (Replicate src));
18587   format %{ "replicateS $dst,$src" %}
18588   ins_encode %{
18589     uint vlen = Matcher::vector_length(this);
18590     int vlen_enc = vector_length_encoding(this);
18591     if (UseAVX >= 2) {
18592       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18593         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18594         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18595       } else {
18596         __ movdl($dst$$XMMRegister, $src$$Register);
18597         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18598       }
18599     } else {
18600       assert(UseAVX < 2, "");
18601       __ movdl($dst$$XMMRegister, $src$$Register);
18602       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18603       if (vlen >= 8) {
18604         assert(vlen == 8, "");
18605         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18606       }
18607     }
18608   %}
18609   ins_pipe( pipe_slow );
18610 %}
18611 
18612 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18613   match(Set dst (Replicate con));
18614   effect(TEMP rtmp);
18615   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18616   ins_encode %{
18617     int vlen_enc = vector_length_encoding(this);
18618     BasicType bt = Matcher::vector_element_basic_type(this);
18619     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18620     __ movl($rtmp$$Register, $con$$constant);
18621     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18622   %}
18623   ins_pipe( pipe_slow );
18624 %}
18625 
18626 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18627   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18628   match(Set dst (Replicate src));
18629   effect(TEMP rtmp);
18630   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18631   ins_encode %{
18632     int vlen_enc = vector_length_encoding(this);
18633     __ evmovw($rtmp$$Register, $src$$XMMRegister);
18634     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18635   %}
18636   ins_pipe( pipe_slow );
18637 %}
18638 
18639 instruct ReplS_mem(vec dst, memory mem) %{
18640   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18641   match(Set dst (Replicate (LoadS mem)));
18642   format %{ "replicateS $dst,$mem" %}
18643   ins_encode %{
18644     int vlen_enc = vector_length_encoding(this);
18645     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18646   %}
18647   ins_pipe( pipe_slow );
18648 %}
18649 
18650 // ====================ReplicateI=======================================
18651 
18652 instruct ReplI_reg(vec dst, rRegI src) %{
18653   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18654   match(Set dst (Replicate src));
18655   format %{ "replicateI $dst,$src" %}
18656   ins_encode %{
18657     uint vlen = Matcher::vector_length(this);
18658     int vlen_enc = vector_length_encoding(this);
18659     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18660       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18661     } else if (VM_Version::supports_avx2()) {
18662       __ movdl($dst$$XMMRegister, $src$$Register);
18663       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18664     } else {
18665       __ movdl($dst$$XMMRegister, $src$$Register);
18666       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18667     }
18668   %}
18669   ins_pipe( pipe_slow );
18670 %}
18671 
18672 instruct ReplI_mem(vec dst, memory mem) %{
18673   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18674   match(Set dst (Replicate (LoadI mem)));
18675   format %{ "replicateI $dst,$mem" %}
18676   ins_encode %{
18677     int vlen_enc = vector_length_encoding(this);
18678     if (VM_Version::supports_avx2()) {
18679       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18680     } else if (VM_Version::supports_avx()) {
18681       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18682     } else {
18683       __ movdl($dst$$XMMRegister, $mem$$Address);
18684       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18685     }
18686   %}
18687   ins_pipe( pipe_slow );
18688 %}
18689 
18690 instruct ReplI_imm(vec dst, immI con) %{
18691   predicate(Matcher::is_non_long_integral_vector(n));
18692   match(Set dst (Replicate con));
18693   format %{ "replicateI $dst,$con" %}
18694   ins_encode %{
18695     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18696                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18697                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
18698     BasicType bt = Matcher::vector_element_basic_type(this);
18699     int vlen = Matcher::vector_length_in_bytes(this);
18700     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18701   %}
18702   ins_pipe( pipe_slow );
18703 %}
18704 
18705 // Replicate scalar zero to be vector
18706 instruct ReplI_zero(vec dst, immI_0 zero) %{
18707   predicate(Matcher::is_non_long_integral_vector(n));
18708   match(Set dst (Replicate zero));
18709   format %{ "replicateI $dst,$zero" %}
18710   ins_encode %{
18711     int vlen_enc = vector_length_encoding(this);
18712     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18713       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18714     } else {
18715       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18716     }
18717   %}
18718   ins_pipe( fpu_reg_reg );
18719 %}
18720 
18721 instruct ReplI_M1(vec dst, immI_M1 con) %{
18722   predicate(Matcher::is_non_long_integral_vector(n));
18723   match(Set dst (Replicate con));
18724   format %{ "vallones $dst" %}
18725   ins_encode %{
18726     int vector_len = vector_length_encoding(this);
18727     __ vallones($dst$$XMMRegister, vector_len);
18728   %}
18729   ins_pipe( pipe_slow );
18730 %}
18731 
18732 // ====================ReplicateL=======================================
18733 
18734 // Replicate long (8 byte) scalar to be vector
18735 instruct ReplL_reg(vec dst, rRegL src) %{
18736   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18737   match(Set dst (Replicate src));
18738   format %{ "replicateL $dst,$src" %}
18739   ins_encode %{
18740     int vlen = Matcher::vector_length(this);
18741     int vlen_enc = vector_length_encoding(this);
18742     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18743       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18744     } else if (VM_Version::supports_avx2()) {
18745       __ movdq($dst$$XMMRegister, $src$$Register);
18746       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18747     } else {
18748       __ movdq($dst$$XMMRegister, $src$$Register);
18749       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18750     }
18751   %}
18752   ins_pipe( pipe_slow );
18753 %}
18754 
18755 instruct ReplL_mem(vec dst, memory mem) %{
18756   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18757   match(Set dst (Replicate (LoadL mem)));
18758   format %{ "replicateL $dst,$mem" %}
18759   ins_encode %{
18760     int vlen_enc = vector_length_encoding(this);
18761     if (VM_Version::supports_avx2()) {
18762       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18763     } else if (VM_Version::supports_sse3()) {
18764       __ movddup($dst$$XMMRegister, $mem$$Address);
18765     } else {
18766       __ movq($dst$$XMMRegister, $mem$$Address);
18767       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18768     }
18769   %}
18770   ins_pipe( pipe_slow );
18771 %}
18772 
18773 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18774 instruct ReplL_imm(vec dst, immL con) %{
18775   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18776   match(Set dst (Replicate con));
18777   format %{ "replicateL $dst,$con" %}
18778   ins_encode %{
18779     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18780     int vlen = Matcher::vector_length_in_bytes(this);
18781     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18782   %}
18783   ins_pipe( pipe_slow );
18784 %}
18785 
18786 instruct ReplL_zero(vec dst, immL0 zero) %{
18787   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18788   match(Set dst (Replicate zero));
18789   format %{ "replicateL $dst,$zero" %}
18790   ins_encode %{
18791     int vlen_enc = vector_length_encoding(this);
18792     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18793       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18794     } else {
18795       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18796     }
18797   %}
18798   ins_pipe( fpu_reg_reg );
18799 %}
18800 
18801 instruct ReplL_M1(vec dst, immL_M1 con) %{
18802   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18803   match(Set dst (Replicate con));
18804   format %{ "vallones $dst" %}
18805   ins_encode %{
18806     int vector_len = vector_length_encoding(this);
18807     __ vallones($dst$$XMMRegister, vector_len);
18808   %}
18809   ins_pipe( pipe_slow );
18810 %}
18811 
18812 // ====================ReplicateF=======================================
18813 
18814 instruct vReplF_reg(vec dst, vlRegF src) %{
18815   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18816   match(Set dst (Replicate src));
18817   format %{ "replicateF $dst,$src" %}
18818   ins_encode %{
18819     uint vlen = Matcher::vector_length(this);
18820     int vlen_enc = vector_length_encoding(this);
18821     if (vlen <= 4) {
18822       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18823     } else if (VM_Version::supports_avx2()) {
18824       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18825     } else {
18826       assert(vlen == 8, "sanity");
18827       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18828       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18829     }
18830   %}
18831   ins_pipe( pipe_slow );
18832 %}
18833 
18834 instruct ReplF_reg(vec dst, vlRegF src) %{
18835   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18836   match(Set dst (Replicate src));
18837   format %{ "replicateF $dst,$src" %}
18838   ins_encode %{
18839     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18840   %}
18841   ins_pipe( pipe_slow );
18842 %}
18843 
18844 instruct ReplF_mem(vec dst, memory mem) %{
18845   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18846   match(Set dst (Replicate (LoadF mem)));
18847   format %{ "replicateF $dst,$mem" %}
18848   ins_encode %{
18849     int vlen_enc = vector_length_encoding(this);
18850     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18851   %}
18852   ins_pipe( pipe_slow );
18853 %}
18854 
18855 // Replicate float scalar immediate to be vector by loading from const table.
18856 instruct ReplF_imm(vec dst, immF con) %{
18857   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18858   match(Set dst (Replicate con));
18859   format %{ "replicateF $dst,$con" %}
18860   ins_encode %{
18861     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18862                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18863     int vlen = Matcher::vector_length_in_bytes(this);
18864     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18865   %}
18866   ins_pipe( pipe_slow );
18867 %}
18868 
18869 instruct ReplF_zero(vec dst, immF0 zero) %{
18870   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18871   match(Set dst (Replicate zero));
18872   format %{ "replicateF $dst,$zero" %}
18873   ins_encode %{
18874     int vlen_enc = vector_length_encoding(this);
18875     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18876       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18877     } else {
18878       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18879     }
18880   %}
18881   ins_pipe( fpu_reg_reg );
18882 %}
18883 
18884 // ====================ReplicateD=======================================
18885 
18886 // Replicate double (8 bytes) scalar to be vector
18887 instruct vReplD_reg(vec dst, vlRegD src) %{
18888   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18889   match(Set dst (Replicate src));
18890   format %{ "replicateD $dst,$src" %}
18891   ins_encode %{
18892     uint vlen = Matcher::vector_length(this);
18893     int vlen_enc = vector_length_encoding(this);
18894     if (vlen <= 2) {
18895       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18896     } else if (VM_Version::supports_avx2()) {
18897       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18898     } else {
18899       assert(vlen == 4, "sanity");
18900       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18901       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18902     }
18903   %}
18904   ins_pipe( pipe_slow );
18905 %}
18906 
18907 instruct ReplD_reg(vec dst, vlRegD src) %{
18908   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18909   match(Set dst (Replicate src));
18910   format %{ "replicateD $dst,$src" %}
18911   ins_encode %{
18912     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18913   %}
18914   ins_pipe( pipe_slow );
18915 %}
18916 
18917 instruct ReplD_mem(vec dst, memory mem) %{
18918   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18919   match(Set dst (Replicate (LoadD mem)));
18920   format %{ "replicateD $dst,$mem" %}
18921   ins_encode %{
18922     if (Matcher::vector_length(this) >= 4) {
18923       int vlen_enc = vector_length_encoding(this);
18924       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18925     } else {
18926       __ movddup($dst$$XMMRegister, $mem$$Address);
18927     }
18928   %}
18929   ins_pipe( pipe_slow );
18930 %}
18931 
18932 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
18933 instruct ReplD_imm(vec dst, immD con) %{
18934   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18935   match(Set dst (Replicate con));
18936   format %{ "replicateD $dst,$con" %}
18937   ins_encode %{
18938     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18939     int vlen = Matcher::vector_length_in_bytes(this);
18940     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
18941   %}
18942   ins_pipe( pipe_slow );
18943 %}
18944 
18945 instruct ReplD_zero(vec dst, immD0 zero) %{
18946   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18947   match(Set dst (Replicate zero));
18948   format %{ "replicateD $dst,$zero" %}
18949   ins_encode %{
18950     int vlen_enc = vector_length_encoding(this);
18951     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18952       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18953     } else {
18954       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18955     }
18956   %}
18957   ins_pipe( fpu_reg_reg );
18958 %}
18959 
18960 // ====================VECTOR INSERT=======================================
18961 
18962 instruct insert(vec dst, rRegI val, immU8 idx) %{
18963   predicate(Matcher::vector_length_in_bytes(n) < 32);
18964   match(Set dst (VectorInsert (Binary dst val) idx));
18965   format %{ "vector_insert $dst,$val,$idx" %}
18966   ins_encode %{
18967     assert(UseSSE >= 4, "required");
18968     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
18969 
18970     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18971 
18972     assert(is_integral_type(elem_bt), "");
18973     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18974 
18975     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
18976   %}
18977   ins_pipe( pipe_slow );
18978 %}
18979 
18980 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
18981   predicate(Matcher::vector_length_in_bytes(n) == 32);
18982   match(Set dst (VectorInsert (Binary src val) idx));
18983   effect(TEMP vtmp);
18984   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18985   ins_encode %{
18986     int vlen_enc = Assembler::AVX_256bit;
18987     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18988     int elem_per_lane = 16/type2aelembytes(elem_bt);
18989     int log2epr = log2(elem_per_lane);
18990 
18991     assert(is_integral_type(elem_bt), "sanity");
18992     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18993 
18994     uint x_idx = $idx$$constant & right_n_bits(log2epr);
18995     uint y_idx = ($idx$$constant >> log2epr) & 1;
18996     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18997     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18998     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18999   %}
19000   ins_pipe( pipe_slow );
19001 %}
19002 
19003 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19004   predicate(Matcher::vector_length_in_bytes(n) == 64);
19005   match(Set dst (VectorInsert (Binary src val) idx));
19006   effect(TEMP vtmp);
19007   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19008   ins_encode %{
19009     assert(UseAVX > 2, "sanity");
19010 
19011     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19012     int elem_per_lane = 16/type2aelembytes(elem_bt);
19013     int log2epr = log2(elem_per_lane);
19014 
19015     assert(is_integral_type(elem_bt), "");
19016     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19017 
19018     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19019     uint y_idx = ($idx$$constant >> log2epr) & 3;
19020     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19021     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19022     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19023   %}
19024   ins_pipe( pipe_slow );
19025 %}
19026 
19027 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19028   predicate(Matcher::vector_length(n) == 2);
19029   match(Set dst (VectorInsert (Binary dst val) idx));
19030   format %{ "vector_insert $dst,$val,$idx" %}
19031   ins_encode %{
19032     assert(UseSSE >= 4, "required");
19033     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19034     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19035 
19036     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19037   %}
19038   ins_pipe( pipe_slow );
19039 %}
19040 
19041 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19042   predicate(Matcher::vector_length(n) == 4);
19043   match(Set dst (VectorInsert (Binary src val) idx));
19044   effect(TEMP vtmp);
19045   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19046   ins_encode %{
19047     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19048     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19049 
19050     uint x_idx = $idx$$constant & right_n_bits(1);
19051     uint y_idx = ($idx$$constant >> 1) & 1;
19052     int vlen_enc = Assembler::AVX_256bit;
19053     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19054     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19055     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19056   %}
19057   ins_pipe( pipe_slow );
19058 %}
19059 
19060 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19061   predicate(Matcher::vector_length(n) == 8);
19062   match(Set dst (VectorInsert (Binary src val) idx));
19063   effect(TEMP vtmp);
19064   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19065   ins_encode %{
19066     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19067     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19068 
19069     uint x_idx = $idx$$constant & right_n_bits(1);
19070     uint y_idx = ($idx$$constant >> 1) & 3;
19071     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19072     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19073     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19074   %}
19075   ins_pipe( pipe_slow );
19076 %}
19077 
19078 instruct insertF(vec dst, regF val, immU8 idx) %{
19079   predicate(Matcher::vector_length(n) < 8);
19080   match(Set dst (VectorInsert (Binary dst val) idx));
19081   format %{ "vector_insert $dst,$val,$idx" %}
19082   ins_encode %{
19083     assert(UseSSE >= 4, "sanity");
19084 
19085     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19086     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19087 
19088     uint x_idx = $idx$$constant & right_n_bits(2);
19089     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19090   %}
19091   ins_pipe( pipe_slow );
19092 %}
19093 
19094 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19095   predicate(Matcher::vector_length(n) >= 8);
19096   match(Set dst (VectorInsert (Binary src val) idx));
19097   effect(TEMP vtmp);
19098   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19099   ins_encode %{
19100     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19101     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19102 
19103     int vlen = Matcher::vector_length(this);
19104     uint x_idx = $idx$$constant & right_n_bits(2);
19105     if (vlen == 8) {
19106       uint y_idx = ($idx$$constant >> 2) & 1;
19107       int vlen_enc = Assembler::AVX_256bit;
19108       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19109       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19110       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19111     } else {
19112       assert(vlen == 16, "sanity");
19113       uint y_idx = ($idx$$constant >> 2) & 3;
19114       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19115       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19116       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19117     }
19118   %}
19119   ins_pipe( pipe_slow );
19120 %}
19121 
19122 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19123   predicate(Matcher::vector_length(n) == 2);
19124   match(Set dst (VectorInsert (Binary dst val) idx));
19125   effect(TEMP tmp);
19126   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19127   ins_encode %{
19128     assert(UseSSE >= 4, "sanity");
19129     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19130     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19131 
19132     __ movq($tmp$$Register, $val$$XMMRegister);
19133     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19134   %}
19135   ins_pipe( pipe_slow );
19136 %}
19137 
19138 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19139   predicate(Matcher::vector_length(n) == 4);
19140   match(Set dst (VectorInsert (Binary src val) idx));
19141   effect(TEMP vtmp, TEMP tmp);
19142   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19143   ins_encode %{
19144     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19145     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19146 
19147     uint x_idx = $idx$$constant & right_n_bits(1);
19148     uint y_idx = ($idx$$constant >> 1) & 1;
19149     int vlen_enc = Assembler::AVX_256bit;
19150     __ movq($tmp$$Register, $val$$XMMRegister);
19151     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19152     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19153     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19154   %}
19155   ins_pipe( pipe_slow );
19156 %}
19157 
19158 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19159   predicate(Matcher::vector_length(n) == 8);
19160   match(Set dst (VectorInsert (Binary src val) idx));
19161   effect(TEMP tmp, TEMP vtmp);
19162   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19163   ins_encode %{
19164     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19165     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19166 
19167     uint x_idx = $idx$$constant & right_n_bits(1);
19168     uint y_idx = ($idx$$constant >> 1) & 3;
19169     __ movq($tmp$$Register, $val$$XMMRegister);
19170     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19171     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19172     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19173   %}
19174   ins_pipe( pipe_slow );
19175 %}
19176 
19177 // ====================REDUCTION ARITHMETIC=======================================
19178 
19179 // =======================Int Reduction==========================================
19180 
19181 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19182   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19183   match(Set dst (AddReductionVI src1 src2));
19184   match(Set dst (MulReductionVI src1 src2));
19185   match(Set dst (AndReductionV  src1 src2));
19186   match(Set dst ( OrReductionV  src1 src2));
19187   match(Set dst (XorReductionV  src1 src2));
19188   match(Set dst (MinReductionV  src1 src2));
19189   match(Set dst (MaxReductionV  src1 src2));
19190   match(Set dst (UMinReductionV  src1 src2));
19191   match(Set dst (UMaxReductionV  src1 src2));
19192   effect(TEMP vtmp1, TEMP vtmp2);
19193   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19194   ins_encode %{
19195     int opcode = this->ideal_Opcode();
19196     int vlen = Matcher::vector_length(this, $src2);
19197     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19198   %}
19199   ins_pipe( pipe_slow );
19200 %}
19201 
19202 // =======================Long Reduction==========================================
19203 
19204 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19205   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19206   match(Set dst (AddReductionVL src1 src2));
19207   match(Set dst (MulReductionVL src1 src2));
19208   match(Set dst (AndReductionV  src1 src2));
19209   match(Set dst ( OrReductionV  src1 src2));
19210   match(Set dst (XorReductionV  src1 src2));
19211   match(Set dst (MinReductionV  src1 src2));
19212   match(Set dst (MaxReductionV  src1 src2));
19213   match(Set dst (UMinReductionV  src1 src2));
19214   match(Set dst (UMaxReductionV  src1 src2));
19215   effect(TEMP vtmp1, TEMP vtmp2);
19216   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19217   ins_encode %{
19218     int opcode = this->ideal_Opcode();
19219     int vlen = Matcher::vector_length(this, $src2);
19220     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19221   %}
19222   ins_pipe( pipe_slow );
19223 %}
19224 
19225 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19226   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19227   match(Set dst (AddReductionVL src1 src2));
19228   match(Set dst (MulReductionVL src1 src2));
19229   match(Set dst (AndReductionV  src1 src2));
19230   match(Set dst ( OrReductionV  src1 src2));
19231   match(Set dst (XorReductionV  src1 src2));
19232   match(Set dst (MinReductionV  src1 src2));
19233   match(Set dst (MaxReductionV  src1 src2));
19234   match(Set dst (UMinReductionV  src1 src2));
19235   match(Set dst (UMaxReductionV  src1 src2));
19236   effect(TEMP vtmp1, TEMP vtmp2);
19237   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19238   ins_encode %{
19239     int opcode = this->ideal_Opcode();
19240     int vlen = Matcher::vector_length(this, $src2);
19241     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19242   %}
19243   ins_pipe( pipe_slow );
19244 %}
19245 
19246 // =======================Float Reduction==========================================
19247 
19248 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19249   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19250   match(Set dst (AddReductionVF dst src));
19251   match(Set dst (MulReductionVF dst src));
19252   effect(TEMP dst, TEMP vtmp);
19253   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19254   ins_encode %{
19255     int opcode = this->ideal_Opcode();
19256     int vlen = Matcher::vector_length(this, $src);
19257     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19258   %}
19259   ins_pipe( pipe_slow );
19260 %}
19261 
19262 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19263   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19264   match(Set dst (AddReductionVF dst src));
19265   match(Set dst (MulReductionVF dst src));
19266   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19267   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19268   ins_encode %{
19269     int opcode = this->ideal_Opcode();
19270     int vlen = Matcher::vector_length(this, $src);
19271     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19272   %}
19273   ins_pipe( pipe_slow );
19274 %}
19275 
19276 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19277   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19278   match(Set dst (AddReductionVF dst src));
19279   match(Set dst (MulReductionVF dst src));
19280   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19281   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19282   ins_encode %{
19283     int opcode = this->ideal_Opcode();
19284     int vlen = Matcher::vector_length(this, $src);
19285     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19286   %}
19287   ins_pipe( pipe_slow );
19288 %}
19289 
19290 
19291 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19292   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19293   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19294   // src1 contains reduction identity
19295   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19296   match(Set dst (AddReductionVF src1 src2));
19297   match(Set dst (MulReductionVF src1 src2));
19298   effect(TEMP dst);
19299   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19300   ins_encode %{
19301     int opcode = this->ideal_Opcode();
19302     int vlen = Matcher::vector_length(this, $src2);
19303     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19304   %}
19305   ins_pipe( pipe_slow );
19306 %}
19307 
19308 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19309   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19310   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19311   // src1 contains reduction identity
19312   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19313   match(Set dst (AddReductionVF src1 src2));
19314   match(Set dst (MulReductionVF src1 src2));
19315   effect(TEMP dst, TEMP vtmp);
19316   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19317   ins_encode %{
19318     int opcode = this->ideal_Opcode();
19319     int vlen = Matcher::vector_length(this, $src2);
19320     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19321   %}
19322   ins_pipe( pipe_slow );
19323 %}
19324 
19325 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19326   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19327   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19328   // src1 contains reduction identity
19329   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19330   match(Set dst (AddReductionVF src1 src2));
19331   match(Set dst (MulReductionVF src1 src2));
19332   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19333   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19334   ins_encode %{
19335     int opcode = this->ideal_Opcode();
19336     int vlen = Matcher::vector_length(this, $src2);
19337     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19338   %}
19339   ins_pipe( pipe_slow );
19340 %}
19341 
19342 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19343   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19344   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19345   // src1 contains reduction identity
19346   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19347   match(Set dst (AddReductionVF src1 src2));
19348   match(Set dst (MulReductionVF src1 src2));
19349   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19350   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19351   ins_encode %{
19352     int opcode = this->ideal_Opcode();
19353     int vlen = Matcher::vector_length(this, $src2);
19354     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19355   %}
19356   ins_pipe( pipe_slow );
19357 %}
19358 
19359 // =======================Double Reduction==========================================
19360 
19361 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19362   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19363   match(Set dst (AddReductionVD dst src));
19364   match(Set dst (MulReductionVD dst src));
19365   effect(TEMP dst, TEMP vtmp);
19366   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19367   ins_encode %{
19368     int opcode = this->ideal_Opcode();
19369     int vlen = Matcher::vector_length(this, $src);
19370     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19371 %}
19372   ins_pipe( pipe_slow );
19373 %}
19374 
19375 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19376   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19377   match(Set dst (AddReductionVD dst src));
19378   match(Set dst (MulReductionVD dst src));
19379   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19380   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19381   ins_encode %{
19382     int opcode = this->ideal_Opcode();
19383     int vlen = Matcher::vector_length(this, $src);
19384     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19385   %}
19386   ins_pipe( pipe_slow );
19387 %}
19388 
19389 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19390   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19391   match(Set dst (AddReductionVD dst src));
19392   match(Set dst (MulReductionVD dst src));
19393   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19394   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19395   ins_encode %{
19396     int opcode = this->ideal_Opcode();
19397     int vlen = Matcher::vector_length(this, $src);
19398     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19399   %}
19400   ins_pipe( pipe_slow );
19401 %}
19402 
19403 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19404   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19405   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19406   // src1 contains reduction identity
19407   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19408   match(Set dst (AddReductionVD src1 src2));
19409   match(Set dst (MulReductionVD src1 src2));
19410   effect(TEMP dst);
19411   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19412   ins_encode %{
19413     int opcode = this->ideal_Opcode();
19414     int vlen = Matcher::vector_length(this, $src2);
19415     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19416 %}
19417   ins_pipe( pipe_slow );
19418 %}
19419 
19420 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19421   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19422   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19423   // src1 contains reduction identity
19424   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19425   match(Set dst (AddReductionVD src1 src2));
19426   match(Set dst (MulReductionVD src1 src2));
19427   effect(TEMP dst, TEMP vtmp);
19428   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19429   ins_encode %{
19430     int opcode = this->ideal_Opcode();
19431     int vlen = Matcher::vector_length(this, $src2);
19432     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19433   %}
19434   ins_pipe( pipe_slow );
19435 %}
19436 
19437 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19438   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19439   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19440   // src1 contains reduction identity
19441   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19442   match(Set dst (AddReductionVD src1 src2));
19443   match(Set dst (MulReductionVD src1 src2));
19444   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19445   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19446   ins_encode %{
19447     int opcode = this->ideal_Opcode();
19448     int vlen = Matcher::vector_length(this, $src2);
19449     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19450   %}
19451   ins_pipe( pipe_slow );
19452 %}
19453 
19454 // =======================Byte Reduction==========================================
19455 
19456 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19457   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19458   match(Set dst (AddReductionVI src1 src2));
19459   match(Set dst (AndReductionV  src1 src2));
19460   match(Set dst ( OrReductionV  src1 src2));
19461   match(Set dst (XorReductionV  src1 src2));
19462   match(Set dst (MinReductionV  src1 src2));
19463   match(Set dst (MaxReductionV  src1 src2));
19464   match(Set dst (UMinReductionV  src1 src2));
19465   match(Set dst (UMaxReductionV  src1 src2));
19466   effect(TEMP vtmp1, TEMP vtmp2);
19467   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19468   ins_encode %{
19469     int opcode = this->ideal_Opcode();
19470     int vlen = Matcher::vector_length(this, $src2);
19471     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19472   %}
19473   ins_pipe( pipe_slow );
19474 %}
19475 
19476 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19477   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19478   match(Set dst (AddReductionVI src1 src2));
19479   match(Set dst (AndReductionV  src1 src2));
19480   match(Set dst ( OrReductionV  src1 src2));
19481   match(Set dst (XorReductionV  src1 src2));
19482   match(Set dst (MinReductionV  src1 src2));
19483   match(Set dst (MaxReductionV  src1 src2));
19484   match(Set dst (UMinReductionV  src1 src2));
19485   match(Set dst (UMaxReductionV  src1 src2));
19486   effect(TEMP vtmp1, TEMP vtmp2);
19487   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19488   ins_encode %{
19489     int opcode = this->ideal_Opcode();
19490     int vlen = Matcher::vector_length(this, $src2);
19491     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19492   %}
19493   ins_pipe( pipe_slow );
19494 %}
19495 
19496 // =======================Short Reduction==========================================
19497 
19498 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19499   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19500   match(Set dst (AddReductionVI src1 src2));
19501   match(Set dst (MulReductionVI src1 src2));
19502   match(Set dst (AndReductionV  src1 src2));
19503   match(Set dst ( OrReductionV  src1 src2));
19504   match(Set dst (XorReductionV  src1 src2));
19505   match(Set dst (MinReductionV  src1 src2));
19506   match(Set dst (MaxReductionV  src1 src2));
19507   match(Set dst (UMinReductionV  src1 src2));
19508   match(Set dst (UMaxReductionV  src1 src2));
19509   effect(TEMP vtmp1, TEMP vtmp2);
19510   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19511   ins_encode %{
19512     int opcode = this->ideal_Opcode();
19513     int vlen = Matcher::vector_length(this, $src2);
19514     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19515   %}
19516   ins_pipe( pipe_slow );
19517 %}
19518 
19519 // =======================Mul Reduction==========================================
19520 
19521 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19522   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19523             Matcher::vector_length(n->in(2)) <= 32); // src2
19524   match(Set dst (MulReductionVI src1 src2));
19525   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19526   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19527   ins_encode %{
19528     int opcode = this->ideal_Opcode();
19529     int vlen = Matcher::vector_length(this, $src2);
19530     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19531   %}
19532   ins_pipe( pipe_slow );
19533 %}
19534 
19535 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19536   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19537             Matcher::vector_length(n->in(2)) == 64); // src2
19538   match(Set dst (MulReductionVI src1 src2));
19539   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19540   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19541   ins_encode %{
19542     int opcode = this->ideal_Opcode();
19543     int vlen = Matcher::vector_length(this, $src2);
19544     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19545   %}
19546   ins_pipe( pipe_slow );
19547 %}
19548 
19549 //--------------------Min/Max Float Reduction --------------------
19550 // Float Min Reduction
19551 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19552                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19553   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19554             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19555              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19556             Matcher::vector_length(n->in(2)) == 2);
19557   match(Set dst (MinReductionV src1 src2));
19558   match(Set dst (MaxReductionV src1 src2));
19559   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19560   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19561   ins_encode %{
19562     assert(UseAVX > 0, "sanity");
19563 
19564     int opcode = this->ideal_Opcode();
19565     int vlen = Matcher::vector_length(this, $src2);
19566     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19567                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19568   %}
19569   ins_pipe( pipe_slow );
19570 %}
19571 
19572 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19573                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19574   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19575             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19576              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19577             Matcher::vector_length(n->in(2)) >= 4);
19578   match(Set dst (MinReductionV src1 src2));
19579   match(Set dst (MaxReductionV src1 src2));
19580   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19581   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19582   ins_encode %{
19583     assert(UseAVX > 0, "sanity");
19584 
19585     int opcode = this->ideal_Opcode();
19586     int vlen = Matcher::vector_length(this, $src2);
19587     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19588                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19589   %}
19590   ins_pipe( pipe_slow );
19591 %}
19592 
19593 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19594                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19595   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19596             Matcher::vector_length(n->in(2)) == 2);
19597   match(Set dst (MinReductionV dst src));
19598   match(Set dst (MaxReductionV dst src));
19599   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19600   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19601   ins_encode %{
19602     assert(UseAVX > 0, "sanity");
19603 
19604     int opcode = this->ideal_Opcode();
19605     int vlen = Matcher::vector_length(this, $src);
19606     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19607                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19608   %}
19609   ins_pipe( pipe_slow );
19610 %}
19611 
19612 
19613 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19614                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19615   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19616             Matcher::vector_length(n->in(2)) >= 4);
19617   match(Set dst (MinReductionV dst src));
19618   match(Set dst (MaxReductionV dst src));
19619   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19620   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19621   ins_encode %{
19622     assert(UseAVX > 0, "sanity");
19623 
19624     int opcode = this->ideal_Opcode();
19625     int vlen = Matcher::vector_length(this, $src);
19626     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19627                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19628   %}
19629   ins_pipe( pipe_slow );
19630 %}
19631 
19632 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19633   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19634             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19635              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19636             Matcher::vector_length(n->in(2)) == 2);
19637   match(Set dst (MinReductionV src1 src2));
19638   match(Set dst (MaxReductionV src1 src2));
19639   effect(TEMP dst, TEMP xtmp1);
19640   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19641   ins_encode %{
19642     int opcode = this->ideal_Opcode();
19643     int vlen = Matcher::vector_length(this, $src2);
19644     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19645                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19646   %}
19647   ins_pipe( pipe_slow );
19648 %}
19649 
19650 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19651   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19652             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19653              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19654             Matcher::vector_length(n->in(2)) >= 4);
19655   match(Set dst (MinReductionV src1 src2));
19656   match(Set dst (MaxReductionV src1 src2));
19657   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19658   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19659   ins_encode %{
19660     int opcode = this->ideal_Opcode();
19661     int vlen = Matcher::vector_length(this, $src2);
19662     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19663                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19664   %}
19665   ins_pipe( pipe_slow );
19666 %}
19667 
19668 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19669   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19670             Matcher::vector_length(n->in(2)) == 2);
19671   match(Set dst (MinReductionV dst src));
19672   match(Set dst (MaxReductionV dst src));
19673   effect(TEMP dst, TEMP xtmp1);
19674   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19675   ins_encode %{
19676     int opcode = this->ideal_Opcode();
19677     int vlen = Matcher::vector_length(this, $src);
19678     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19679                          $xtmp1$$XMMRegister);
19680   %}
19681   ins_pipe( pipe_slow );
19682 %}
19683 
19684 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19685   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19686             Matcher::vector_length(n->in(2)) >= 4);
19687   match(Set dst (MinReductionV dst src));
19688   match(Set dst (MaxReductionV dst src));
19689   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19690   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19691   ins_encode %{
19692     int opcode = this->ideal_Opcode();
19693     int vlen = Matcher::vector_length(this, $src);
19694     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19695                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19696   %}
19697   ins_pipe( pipe_slow );
19698 %}
19699 
19700 //--------------------Min Double Reduction --------------------
19701 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19702                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19703   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19704             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19705              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19706             Matcher::vector_length(n->in(2)) == 2);
19707   match(Set dst (MinReductionV src1 src2));
19708   match(Set dst (MaxReductionV src1 src2));
19709   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19710   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19711   ins_encode %{
19712     assert(UseAVX > 0, "sanity");
19713 
19714     int opcode = this->ideal_Opcode();
19715     int vlen = Matcher::vector_length(this, $src2);
19716     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19717                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19718   %}
19719   ins_pipe( pipe_slow );
19720 %}
19721 
19722 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19723                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19724   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19725             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19726              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19727             Matcher::vector_length(n->in(2)) >= 4);
19728   match(Set dst (MinReductionV src1 src2));
19729   match(Set dst (MaxReductionV src1 src2));
19730   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19731   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19732   ins_encode %{
19733     assert(UseAVX > 0, "sanity");
19734 
19735     int opcode = this->ideal_Opcode();
19736     int vlen = Matcher::vector_length(this, $src2);
19737     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19738                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19739   %}
19740   ins_pipe( pipe_slow );
19741 %}
19742 
19743 
19744 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19745                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19746   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19747             Matcher::vector_length(n->in(2)) == 2);
19748   match(Set dst (MinReductionV dst src));
19749   match(Set dst (MaxReductionV dst src));
19750   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19751   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19752   ins_encode %{
19753     assert(UseAVX > 0, "sanity");
19754 
19755     int opcode = this->ideal_Opcode();
19756     int vlen = Matcher::vector_length(this, $src);
19757     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19758                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19759   %}
19760   ins_pipe( pipe_slow );
19761 %}
19762 
19763 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19764                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19765   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19766             Matcher::vector_length(n->in(2)) >= 4);
19767   match(Set dst (MinReductionV dst src));
19768   match(Set dst (MaxReductionV dst src));
19769   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19770   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19771   ins_encode %{
19772     assert(UseAVX > 0, "sanity");
19773 
19774     int opcode = this->ideal_Opcode();
19775     int vlen = Matcher::vector_length(this, $src);
19776     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19777                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19778   %}
19779   ins_pipe( pipe_slow );
19780 %}
19781 
19782 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19783   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19784             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19785              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19786             Matcher::vector_length(n->in(2)) == 2);
19787   match(Set dst (MinReductionV src1 src2));
19788   match(Set dst (MaxReductionV src1 src2));
19789   effect(TEMP dst, TEMP xtmp1);
19790   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19791   ins_encode %{
19792     int opcode = this->ideal_Opcode();
19793     int vlen = Matcher::vector_length(this, $src2);
19794     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19795                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
19796   %}
19797   ins_pipe( pipe_slow );
19798 %}
19799 
19800 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19801   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19802             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19803              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19804             Matcher::vector_length(n->in(2)) >= 4);
19805   match(Set dst (MinReductionV src1 src2));
19806   match(Set dst (MaxReductionV src1 src2));
19807   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19808   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19809   ins_encode %{
19810     int opcode = this->ideal_Opcode();
19811     int vlen = Matcher::vector_length(this, $src2);
19812     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19813                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19814   %}
19815   ins_pipe( pipe_slow );
19816 %}
19817 
19818 
19819 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
19820   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19821             Matcher::vector_length(n->in(2)) == 2);
19822   match(Set dst (MinReductionV dst src));
19823   match(Set dst (MaxReductionV dst src));
19824   effect(TEMP dst, TEMP xtmp1);
19825   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19826   ins_encode %{
19827     int opcode = this->ideal_Opcode();
19828     int vlen = Matcher::vector_length(this, $src);
19829     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19830                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19831   %}
19832   ins_pipe( pipe_slow );
19833 %}
19834 
19835 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19836   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19837             Matcher::vector_length(n->in(2)) >= 4);
19838   match(Set dst (MinReductionV dst src));
19839   match(Set dst (MaxReductionV dst src));
19840   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19841   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19842   ins_encode %{
19843     int opcode = this->ideal_Opcode();
19844     int vlen = Matcher::vector_length(this, $src);
19845     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19846                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19847   %}
19848   ins_pipe( pipe_slow );
19849 %}
19850 
19851 // ====================VECTOR ARITHMETIC=======================================
19852 
19853 // --------------------------------- ADD --------------------------------------
19854 
19855 // Bytes vector add
19856 instruct vaddB(vec dst, vec src) %{
19857   predicate(UseAVX == 0);
19858   match(Set dst (AddVB dst src));
19859   format %{ "paddb   $dst,$src\t! add packedB" %}
19860   ins_encode %{
19861     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19862   %}
19863   ins_pipe( pipe_slow );
19864 %}
19865 
19866 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19867   predicate(UseAVX > 0);
19868   match(Set dst (AddVB src1 src2));
19869   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
19870   ins_encode %{
19871     int vlen_enc = vector_length_encoding(this);
19872     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19873   %}
19874   ins_pipe( pipe_slow );
19875 %}
19876 
19877 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19878   predicate((UseAVX > 0) &&
19879             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19880   match(Set dst (AddVB src (LoadVector mem)));
19881   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
19882   ins_encode %{
19883     int vlen_enc = vector_length_encoding(this);
19884     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19885   %}
19886   ins_pipe( pipe_slow );
19887 %}
19888 
19889 // Shorts/Chars vector add
19890 instruct vaddS(vec dst, vec src) %{
19891   predicate(UseAVX == 0);
19892   match(Set dst (AddVS dst src));
19893   format %{ "paddw   $dst,$src\t! add packedS" %}
19894   ins_encode %{
19895     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19896   %}
19897   ins_pipe( pipe_slow );
19898 %}
19899 
19900 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19901   predicate(UseAVX > 0);
19902   match(Set dst (AddVS src1 src2));
19903   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
19904   ins_encode %{
19905     int vlen_enc = vector_length_encoding(this);
19906     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19907   %}
19908   ins_pipe( pipe_slow );
19909 %}
19910 
19911 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19912   predicate((UseAVX > 0) &&
19913             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19914   match(Set dst (AddVS src (LoadVector mem)));
19915   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
19916   ins_encode %{
19917     int vlen_enc = vector_length_encoding(this);
19918     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19919   %}
19920   ins_pipe( pipe_slow );
19921 %}
19922 
19923 // Integers vector add
19924 instruct vaddI(vec dst, vec src) %{
19925   predicate(UseAVX == 0);
19926   match(Set dst (AddVI dst src));
19927   format %{ "paddd   $dst,$src\t! add packedI" %}
19928   ins_encode %{
19929     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19930   %}
19931   ins_pipe( pipe_slow );
19932 %}
19933 
19934 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19935   predicate(UseAVX > 0);
19936   match(Set dst (AddVI src1 src2));
19937   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
19938   ins_encode %{
19939     int vlen_enc = vector_length_encoding(this);
19940     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19941   %}
19942   ins_pipe( pipe_slow );
19943 %}
19944 
19945 
19946 instruct vaddI_mem(vec dst, vec src, memory mem) %{
19947   predicate((UseAVX > 0) &&
19948             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19949   match(Set dst (AddVI src (LoadVector mem)));
19950   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
19951   ins_encode %{
19952     int vlen_enc = vector_length_encoding(this);
19953     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19954   %}
19955   ins_pipe( pipe_slow );
19956 %}
19957 
19958 // Longs vector add
19959 instruct vaddL(vec dst, vec src) %{
19960   predicate(UseAVX == 0);
19961   match(Set dst (AddVL dst src));
19962   format %{ "paddq   $dst,$src\t! add packedL" %}
19963   ins_encode %{
19964     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
19965   %}
19966   ins_pipe( pipe_slow );
19967 %}
19968 
19969 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
19970   predicate(UseAVX > 0);
19971   match(Set dst (AddVL src1 src2));
19972   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
19973   ins_encode %{
19974     int vlen_enc = vector_length_encoding(this);
19975     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19976   %}
19977   ins_pipe( pipe_slow );
19978 %}
19979 
19980 instruct vaddL_mem(vec dst, vec src, memory mem) %{
19981   predicate((UseAVX > 0) &&
19982             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19983   match(Set dst (AddVL src (LoadVector mem)));
19984   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
19985   ins_encode %{
19986     int vlen_enc = vector_length_encoding(this);
19987     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19988   %}
19989   ins_pipe( pipe_slow );
19990 %}
19991 
19992 // Floats vector add
19993 instruct vaddF(vec dst, vec src) %{
19994   predicate(UseAVX == 0);
19995   match(Set dst (AddVF dst src));
19996   format %{ "addps   $dst,$src\t! add packedF" %}
19997   ins_encode %{
19998     __ addps($dst$$XMMRegister, $src$$XMMRegister);
19999   %}
20000   ins_pipe( pipe_slow );
20001 %}
20002 
20003 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20004   predicate(UseAVX > 0);
20005   match(Set dst (AddVF src1 src2));
20006   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20007   ins_encode %{
20008     int vlen_enc = vector_length_encoding(this);
20009     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20010   %}
20011   ins_pipe( pipe_slow );
20012 %}
20013 
20014 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20015   predicate((UseAVX > 0) &&
20016             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20017   match(Set dst (AddVF src (LoadVector mem)));
20018   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20019   ins_encode %{
20020     int vlen_enc = vector_length_encoding(this);
20021     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20022   %}
20023   ins_pipe( pipe_slow );
20024 %}
20025 
20026 // Doubles vector add
20027 instruct vaddD(vec dst, vec src) %{
20028   predicate(UseAVX == 0);
20029   match(Set dst (AddVD dst src));
20030   format %{ "addpd   $dst,$src\t! add packedD" %}
20031   ins_encode %{
20032     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20033   %}
20034   ins_pipe( pipe_slow );
20035 %}
20036 
20037 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20038   predicate(UseAVX > 0);
20039   match(Set dst (AddVD src1 src2));
20040   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20041   ins_encode %{
20042     int vlen_enc = vector_length_encoding(this);
20043     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20044   %}
20045   ins_pipe( pipe_slow );
20046 %}
20047 
20048 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20049   predicate((UseAVX > 0) &&
20050             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20051   match(Set dst (AddVD src (LoadVector mem)));
20052   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20053   ins_encode %{
20054     int vlen_enc = vector_length_encoding(this);
20055     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20056   %}
20057   ins_pipe( pipe_slow );
20058 %}
20059 
20060 // --------------------------------- SUB --------------------------------------
20061 
20062 // Bytes vector sub
20063 instruct vsubB(vec dst, vec src) %{
20064   predicate(UseAVX == 0);
20065   match(Set dst (SubVB dst src));
20066   format %{ "psubb   $dst,$src\t! sub packedB" %}
20067   ins_encode %{
20068     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20069   %}
20070   ins_pipe( pipe_slow );
20071 %}
20072 
20073 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20074   predicate(UseAVX > 0);
20075   match(Set dst (SubVB src1 src2));
20076   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20077   ins_encode %{
20078     int vlen_enc = vector_length_encoding(this);
20079     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20080   %}
20081   ins_pipe( pipe_slow );
20082 %}
20083 
20084 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20085   predicate((UseAVX > 0) &&
20086             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20087   match(Set dst (SubVB src (LoadVector mem)));
20088   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20089   ins_encode %{
20090     int vlen_enc = vector_length_encoding(this);
20091     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20092   %}
20093   ins_pipe( pipe_slow );
20094 %}
20095 
20096 // Shorts/Chars vector sub
20097 instruct vsubS(vec dst, vec src) %{
20098   predicate(UseAVX == 0);
20099   match(Set dst (SubVS dst src));
20100   format %{ "psubw   $dst,$src\t! sub packedS" %}
20101   ins_encode %{
20102     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20103   %}
20104   ins_pipe( pipe_slow );
20105 %}
20106 
20107 
20108 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20109   predicate(UseAVX > 0);
20110   match(Set dst (SubVS src1 src2));
20111   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20112   ins_encode %{
20113     int vlen_enc = vector_length_encoding(this);
20114     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20115   %}
20116   ins_pipe( pipe_slow );
20117 %}
20118 
20119 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20120   predicate((UseAVX > 0) &&
20121             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20122   match(Set dst (SubVS src (LoadVector mem)));
20123   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20124   ins_encode %{
20125     int vlen_enc = vector_length_encoding(this);
20126     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20127   %}
20128   ins_pipe( pipe_slow );
20129 %}
20130 
20131 // Integers vector sub
20132 instruct vsubI(vec dst, vec src) %{
20133   predicate(UseAVX == 0);
20134   match(Set dst (SubVI dst src));
20135   format %{ "psubd   $dst,$src\t! sub packedI" %}
20136   ins_encode %{
20137     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20138   %}
20139   ins_pipe( pipe_slow );
20140 %}
20141 
20142 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20143   predicate(UseAVX > 0);
20144   match(Set dst (SubVI src1 src2));
20145   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20146   ins_encode %{
20147     int vlen_enc = vector_length_encoding(this);
20148     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20149   %}
20150   ins_pipe( pipe_slow );
20151 %}
20152 
20153 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20154   predicate((UseAVX > 0) &&
20155             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20156   match(Set dst (SubVI src (LoadVector mem)));
20157   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20158   ins_encode %{
20159     int vlen_enc = vector_length_encoding(this);
20160     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20161   %}
20162   ins_pipe( pipe_slow );
20163 %}
20164 
20165 // Longs vector sub
20166 instruct vsubL(vec dst, vec src) %{
20167   predicate(UseAVX == 0);
20168   match(Set dst (SubVL dst src));
20169   format %{ "psubq   $dst,$src\t! sub packedL" %}
20170   ins_encode %{
20171     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20172   %}
20173   ins_pipe( pipe_slow );
20174 %}
20175 
20176 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20177   predicate(UseAVX > 0);
20178   match(Set dst (SubVL src1 src2));
20179   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20180   ins_encode %{
20181     int vlen_enc = vector_length_encoding(this);
20182     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20183   %}
20184   ins_pipe( pipe_slow );
20185 %}
20186 
20187 
20188 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20189   predicate((UseAVX > 0) &&
20190             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20191   match(Set dst (SubVL src (LoadVector mem)));
20192   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20193   ins_encode %{
20194     int vlen_enc = vector_length_encoding(this);
20195     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20196   %}
20197   ins_pipe( pipe_slow );
20198 %}
20199 
20200 // Floats vector sub
20201 instruct vsubF(vec dst, vec src) %{
20202   predicate(UseAVX == 0);
20203   match(Set dst (SubVF dst src));
20204   format %{ "subps   $dst,$src\t! sub packedF" %}
20205   ins_encode %{
20206     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20207   %}
20208   ins_pipe( pipe_slow );
20209 %}
20210 
20211 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20212   predicate(UseAVX > 0);
20213   match(Set dst (SubVF src1 src2));
20214   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20215   ins_encode %{
20216     int vlen_enc = vector_length_encoding(this);
20217     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20218   %}
20219   ins_pipe( pipe_slow );
20220 %}
20221 
20222 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20223   predicate((UseAVX > 0) &&
20224             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20225   match(Set dst (SubVF src (LoadVector mem)));
20226   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20227   ins_encode %{
20228     int vlen_enc = vector_length_encoding(this);
20229     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20230   %}
20231   ins_pipe( pipe_slow );
20232 %}
20233 
20234 // Doubles vector sub
20235 instruct vsubD(vec dst, vec src) %{
20236   predicate(UseAVX == 0);
20237   match(Set dst (SubVD dst src));
20238   format %{ "subpd   $dst,$src\t! sub packedD" %}
20239   ins_encode %{
20240     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20241   %}
20242   ins_pipe( pipe_slow );
20243 %}
20244 
20245 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20246   predicate(UseAVX > 0);
20247   match(Set dst (SubVD src1 src2));
20248   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20249   ins_encode %{
20250     int vlen_enc = vector_length_encoding(this);
20251     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20252   %}
20253   ins_pipe( pipe_slow );
20254 %}
20255 
20256 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20257   predicate((UseAVX > 0) &&
20258             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20259   match(Set dst (SubVD src (LoadVector mem)));
20260   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20261   ins_encode %{
20262     int vlen_enc = vector_length_encoding(this);
20263     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20264   %}
20265   ins_pipe( pipe_slow );
20266 %}
20267 
20268 // --------------------------------- MUL --------------------------------------
20269 
20270 // Byte vector mul
20271 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20272   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20273   match(Set dst (MulVB src1 src2));
20274   effect(TEMP dst, TEMP xtmp);
20275   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20276   ins_encode %{
20277     assert(UseSSE > 3, "required");
20278     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20279     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20280     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20281     __ psllw($dst$$XMMRegister, 8);
20282     __ psrlw($dst$$XMMRegister, 8);
20283     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20284   %}
20285   ins_pipe( pipe_slow );
20286 %}
20287 
20288 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20289   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20290   match(Set dst (MulVB src1 src2));
20291   effect(TEMP dst, TEMP xtmp);
20292   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20293   ins_encode %{
20294     assert(UseSSE > 3, "required");
20295     // Odd-index elements
20296     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20297     __ psrlw($dst$$XMMRegister, 8);
20298     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20299     __ psrlw($xtmp$$XMMRegister, 8);
20300     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20301     __ psllw($dst$$XMMRegister, 8);
20302     // Even-index elements
20303     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20304     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20305     __ psllw($xtmp$$XMMRegister, 8);
20306     __ psrlw($xtmp$$XMMRegister, 8);
20307     // Combine
20308     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20309   %}
20310   ins_pipe( pipe_slow );
20311 %}
20312 
20313 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20314   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20315   match(Set dst (MulVB src1 src2));
20316   effect(TEMP xtmp1, TEMP xtmp2);
20317   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20318   ins_encode %{
20319     int vlen_enc = vector_length_encoding(this);
20320     // Odd-index elements
20321     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20322     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20323     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20324     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20325     // Even-index elements
20326     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20327     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20328     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20329     // Combine
20330     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20331   %}
20332   ins_pipe( pipe_slow );
20333 %}
20334 
20335 // Shorts/Chars vector mul
20336 instruct vmulS(vec dst, vec src) %{
20337   predicate(UseAVX == 0);
20338   match(Set dst (MulVS dst src));
20339   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20340   ins_encode %{
20341     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20342   %}
20343   ins_pipe( pipe_slow );
20344 %}
20345 
20346 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20347   predicate(UseAVX > 0);
20348   match(Set dst (MulVS src1 src2));
20349   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20350   ins_encode %{
20351     int vlen_enc = vector_length_encoding(this);
20352     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20353   %}
20354   ins_pipe( pipe_slow );
20355 %}
20356 
20357 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20358   predicate((UseAVX > 0) &&
20359             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20360   match(Set dst (MulVS src (LoadVector mem)));
20361   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20362   ins_encode %{
20363     int vlen_enc = vector_length_encoding(this);
20364     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20365   %}
20366   ins_pipe( pipe_slow );
20367 %}
20368 
20369 // Integers vector mul
20370 instruct vmulI(vec dst, vec src) %{
20371   predicate(UseAVX == 0);
20372   match(Set dst (MulVI dst src));
20373   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20374   ins_encode %{
20375     assert(UseSSE > 3, "required");
20376     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20377   %}
20378   ins_pipe( pipe_slow );
20379 %}
20380 
20381 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20382   predicate(UseAVX > 0);
20383   match(Set dst (MulVI src1 src2));
20384   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20385   ins_encode %{
20386     int vlen_enc = vector_length_encoding(this);
20387     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20388   %}
20389   ins_pipe( pipe_slow );
20390 %}
20391 
20392 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20393   predicate((UseAVX > 0) &&
20394             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20395   match(Set dst (MulVI src (LoadVector mem)));
20396   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20397   ins_encode %{
20398     int vlen_enc = vector_length_encoding(this);
20399     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20400   %}
20401   ins_pipe( pipe_slow );
20402 %}
20403 
20404 // Longs vector mul
20405 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20406   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20407              VM_Version::supports_avx512dq()) ||
20408             VM_Version::supports_avx512vldq());
20409   match(Set dst (MulVL src1 src2));
20410   ins_cost(500);
20411   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20412   ins_encode %{
20413     assert(UseAVX > 2, "required");
20414     int vlen_enc = vector_length_encoding(this);
20415     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20416   %}
20417   ins_pipe( pipe_slow );
20418 %}
20419 
20420 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20421   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20422              VM_Version::supports_avx512dq()) ||
20423             (Matcher::vector_length_in_bytes(n) > 8 &&
20424              VM_Version::supports_avx512vldq()));
20425   match(Set dst (MulVL src (LoadVector mem)));
20426   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20427   ins_cost(500);
20428   ins_encode %{
20429     assert(UseAVX > 2, "required");
20430     int vlen_enc = vector_length_encoding(this);
20431     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20432   %}
20433   ins_pipe( pipe_slow );
20434 %}
20435 
20436 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20437   predicate(UseAVX == 0);
20438   match(Set dst (MulVL src1 src2));
20439   ins_cost(500);
20440   effect(TEMP dst, TEMP xtmp);
20441   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20442   ins_encode %{
20443     assert(VM_Version::supports_sse4_1(), "required");
20444     // Get the lo-hi products, only the lower 32 bits is in concerns
20445     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20446     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20447     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20448     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20449     __ psllq($dst$$XMMRegister, 32);
20450     // Get the lo-lo products
20451     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20452     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20453     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20454   %}
20455   ins_pipe( pipe_slow );
20456 %}
20457 
20458 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20459   predicate(UseAVX > 0 &&
20460             ((Matcher::vector_length_in_bytes(n) == 64 &&
20461               !VM_Version::supports_avx512dq()) ||
20462              (Matcher::vector_length_in_bytes(n) < 64 &&
20463               !VM_Version::supports_avx512vldq())));
20464   match(Set dst (MulVL src1 src2));
20465   effect(TEMP xtmp1, TEMP xtmp2);
20466   ins_cost(500);
20467   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20468   ins_encode %{
20469     int vlen_enc = vector_length_encoding(this);
20470     // Get the lo-hi products, only the lower 32 bits is in concerns
20471     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20472     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20473     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20474     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20475     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20476     // Get the lo-lo products
20477     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20478     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20479   %}
20480   ins_pipe( pipe_slow );
20481 %}
20482 
20483 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20484   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20485   match(Set dst (MulVL src1 src2));
20486   ins_cost(100);
20487   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20488   ins_encode %{
20489     int vlen_enc = vector_length_encoding(this);
20490     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20491   %}
20492   ins_pipe( pipe_slow );
20493 %}
20494 
20495 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20496   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20497   match(Set dst (MulVL src1 src2));
20498   ins_cost(100);
20499   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20500   ins_encode %{
20501     int vlen_enc = vector_length_encoding(this);
20502     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20503   %}
20504   ins_pipe( pipe_slow );
20505 %}
20506 
20507 // Floats vector mul
20508 instruct vmulF(vec dst, vec src) %{
20509   predicate(UseAVX == 0);
20510   match(Set dst (MulVF dst src));
20511   format %{ "mulps   $dst,$src\t! mul packedF" %}
20512   ins_encode %{
20513     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20514   %}
20515   ins_pipe( pipe_slow );
20516 %}
20517 
20518 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20519   predicate(UseAVX > 0);
20520   match(Set dst (MulVF src1 src2));
20521   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20522   ins_encode %{
20523     int vlen_enc = vector_length_encoding(this);
20524     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20525   %}
20526   ins_pipe( pipe_slow );
20527 %}
20528 
20529 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20530   predicate((UseAVX > 0) &&
20531             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20532   match(Set dst (MulVF src (LoadVector mem)));
20533   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20534   ins_encode %{
20535     int vlen_enc = vector_length_encoding(this);
20536     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20537   %}
20538   ins_pipe( pipe_slow );
20539 %}
20540 
20541 // Doubles vector mul
20542 instruct vmulD(vec dst, vec src) %{
20543   predicate(UseAVX == 0);
20544   match(Set dst (MulVD dst src));
20545   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20546   ins_encode %{
20547     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20548   %}
20549   ins_pipe( pipe_slow );
20550 %}
20551 
20552 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20553   predicate(UseAVX > 0);
20554   match(Set dst (MulVD src1 src2));
20555   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20556   ins_encode %{
20557     int vlen_enc = vector_length_encoding(this);
20558     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20559   %}
20560   ins_pipe( pipe_slow );
20561 %}
20562 
20563 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20564   predicate((UseAVX > 0) &&
20565             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20566   match(Set dst (MulVD src (LoadVector mem)));
20567   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20568   ins_encode %{
20569     int vlen_enc = vector_length_encoding(this);
20570     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20571   %}
20572   ins_pipe( pipe_slow );
20573 %}
20574 
20575 // --------------------------------- DIV --------------------------------------
20576 
20577 // Floats vector div
20578 instruct vdivF(vec dst, vec src) %{
20579   predicate(UseAVX == 0);
20580   match(Set dst (DivVF dst src));
20581   format %{ "divps   $dst,$src\t! div packedF" %}
20582   ins_encode %{
20583     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20584   %}
20585   ins_pipe( pipe_slow );
20586 %}
20587 
20588 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20589   predicate(UseAVX > 0);
20590   match(Set dst (DivVF src1 src2));
20591   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20592   ins_encode %{
20593     int vlen_enc = vector_length_encoding(this);
20594     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20595   %}
20596   ins_pipe( pipe_slow );
20597 %}
20598 
20599 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20600   predicate((UseAVX > 0) &&
20601             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20602   match(Set dst (DivVF src (LoadVector mem)));
20603   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20604   ins_encode %{
20605     int vlen_enc = vector_length_encoding(this);
20606     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20607   %}
20608   ins_pipe( pipe_slow );
20609 %}
20610 
20611 // Doubles vector div
20612 instruct vdivD(vec dst, vec src) %{
20613   predicate(UseAVX == 0);
20614   match(Set dst (DivVD dst src));
20615   format %{ "divpd   $dst,$src\t! div packedD" %}
20616   ins_encode %{
20617     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20618   %}
20619   ins_pipe( pipe_slow );
20620 %}
20621 
20622 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20623   predicate(UseAVX > 0);
20624   match(Set dst (DivVD src1 src2));
20625   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20626   ins_encode %{
20627     int vlen_enc = vector_length_encoding(this);
20628     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20629   %}
20630   ins_pipe( pipe_slow );
20631 %}
20632 
20633 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20634   predicate((UseAVX > 0) &&
20635             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20636   match(Set dst (DivVD src (LoadVector mem)));
20637   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20638   ins_encode %{
20639     int vlen_enc = vector_length_encoding(this);
20640     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20641   %}
20642   ins_pipe( pipe_slow );
20643 %}
20644 
20645 // ------------------------------ MinMax ---------------------------------------
20646 
20647 // Byte, Short, Int vector Min/Max
20648 instruct minmax_reg_sse(vec dst, vec src) %{
20649   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20650             UseAVX == 0);
20651   match(Set dst (MinV dst src));
20652   match(Set dst (MaxV dst src));
20653   format %{ "vector_minmax  $dst,$src\t!  " %}
20654   ins_encode %{
20655     assert(UseSSE >= 4, "required");
20656 
20657     int opcode = this->ideal_Opcode();
20658     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20659     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20660   %}
20661   ins_pipe( pipe_slow );
20662 %}
20663 
20664 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20665   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20666             UseAVX > 0);
20667   match(Set dst (MinV src1 src2));
20668   match(Set dst (MaxV src1 src2));
20669   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
20670   ins_encode %{
20671     int opcode = this->ideal_Opcode();
20672     int vlen_enc = vector_length_encoding(this);
20673     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20674 
20675     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20676   %}
20677   ins_pipe( pipe_slow );
20678 %}
20679 
20680 // Long vector Min/Max
20681 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20682   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20683             UseAVX == 0);
20684   match(Set dst (MinV dst src));
20685   match(Set dst (MaxV src dst));
20686   effect(TEMP dst, TEMP tmp);
20687   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
20688   ins_encode %{
20689     assert(UseSSE >= 4, "required");
20690 
20691     int opcode = this->ideal_Opcode();
20692     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20693     assert(elem_bt == T_LONG, "sanity");
20694 
20695     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20696   %}
20697   ins_pipe( pipe_slow );
20698 %}
20699 
20700 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20701   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20702             UseAVX > 0 && !VM_Version::supports_avx512vl());
20703   match(Set dst (MinV src1 src2));
20704   match(Set dst (MaxV src1 src2));
20705   effect(TEMP dst);
20706   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
20707   ins_encode %{
20708     int vlen_enc = vector_length_encoding(this);
20709     int opcode = this->ideal_Opcode();
20710     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20711     assert(elem_bt == T_LONG, "sanity");
20712 
20713     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20714   %}
20715   ins_pipe( pipe_slow );
20716 %}
20717 
20718 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20719   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20720             Matcher::vector_element_basic_type(n) == T_LONG);
20721   match(Set dst (MinV src1 src2));
20722   match(Set dst (MaxV src1 src2));
20723   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
20724   ins_encode %{
20725     assert(UseAVX > 2, "required");
20726 
20727     int vlen_enc = vector_length_encoding(this);
20728     int opcode = this->ideal_Opcode();
20729     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20730     assert(elem_bt == T_LONG, "sanity");
20731 
20732     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20733   %}
20734   ins_pipe( pipe_slow );
20735 %}
20736 
20737 // Float/Double vector Min/Max
20738 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20739   predicate(VM_Version::supports_avx10_2() &&
20740             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20741   match(Set dst (MinV a b));
20742   match(Set dst (MaxV a b));
20743   format %{ "vector_minmaxFP  $dst, $a, $b" %}
20744   ins_encode %{
20745     int vlen_enc = vector_length_encoding(this);
20746     int opcode = this->ideal_Opcode();
20747     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20748     __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20749   %}
20750   ins_pipe( pipe_slow );
20751 %}
20752 
20753 // Float/Double vector Min/Max
20754 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20755   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20756             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20757             UseAVX > 0);
20758   match(Set dst (MinV a b));
20759   match(Set dst (MaxV a b));
20760   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20761   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20762   ins_encode %{
20763     assert(UseAVX > 0, "required");
20764 
20765     int opcode = this->ideal_Opcode();
20766     int vlen_enc = vector_length_encoding(this);
20767     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20768 
20769     __ vminmax_fp(opcode, elem_bt,
20770                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20771                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20772   %}
20773   ins_pipe( pipe_slow );
20774 %}
20775 
20776 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20777   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20778             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20779   match(Set dst (MinV a b));
20780   match(Set dst (MaxV a b));
20781   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20782   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20783   ins_encode %{
20784     assert(UseAVX > 2, "required");
20785 
20786     int opcode = this->ideal_Opcode();
20787     int vlen_enc = vector_length_encoding(this);
20788     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20789 
20790     __ evminmax_fp(opcode, elem_bt,
20791                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20792                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20793   %}
20794   ins_pipe( pipe_slow );
20795 %}
20796 
20797 // ------------------------------ Unsigned vector Min/Max ----------------------
20798 
20799 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20800   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20801   match(Set dst (UMinV a b));
20802   match(Set dst (UMaxV a b));
20803   format %{ "vector_uminmax $dst,$a,$b\t!" %}
20804   ins_encode %{
20805     int opcode = this->ideal_Opcode();
20806     int vlen_enc = vector_length_encoding(this);
20807     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20808     assert(is_integral_type(elem_bt), "");
20809     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20810   %}
20811   ins_pipe( pipe_slow );
20812 %}
20813 
20814 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20815   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20816   match(Set dst (UMinV a (LoadVector b)));
20817   match(Set dst (UMaxV a (LoadVector b)));
20818   format %{ "vector_uminmax $dst,$a,$b\t!" %}
20819   ins_encode %{
20820     int opcode = this->ideal_Opcode();
20821     int vlen_enc = vector_length_encoding(this);
20822     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20823     assert(is_integral_type(elem_bt), "");
20824     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20825   %}
20826   ins_pipe( pipe_slow );
20827 %}
20828 
20829 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20830   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20831   match(Set dst (UMinV a b));
20832   match(Set dst (UMaxV a b));
20833   effect(TEMP xtmp1, TEMP xtmp2);
20834   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20835   ins_encode %{
20836     int opcode = this->ideal_Opcode();
20837     int vlen_enc = vector_length_encoding(this);
20838     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20839   %}
20840   ins_pipe( pipe_slow );
20841 %}
20842 
20843 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20844   match(Set dst (UMinV (Binary dst src2) mask));
20845   match(Set dst (UMaxV (Binary dst src2) mask));
20846   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20847   ins_encode %{
20848     int vlen_enc = vector_length_encoding(this);
20849     BasicType bt = Matcher::vector_element_basic_type(this);
20850     int opc = this->ideal_Opcode();
20851     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20852                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20853   %}
20854   ins_pipe( pipe_slow );
20855 %}
20856 
20857 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20858   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20859   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20860   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20861   ins_encode %{
20862     int vlen_enc = vector_length_encoding(this);
20863     BasicType bt = Matcher::vector_element_basic_type(this);
20864     int opc = this->ideal_Opcode();
20865     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20866                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20867   %}
20868   ins_pipe( pipe_slow );
20869 %}
20870 
20871 // --------------------------------- Signum/CopySign ---------------------------
20872 
20873 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20874   match(Set dst (SignumF dst (Binary zero one)));
20875   effect(KILL cr);
20876   format %{ "signumF $dst, $dst" %}
20877   ins_encode %{
20878     int opcode = this->ideal_Opcode();
20879     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20880   %}
20881   ins_pipe( pipe_slow );
20882 %}
20883 
20884 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20885   match(Set dst (SignumD dst (Binary zero one)));
20886   effect(KILL cr);
20887   format %{ "signumD $dst, $dst" %}
20888   ins_encode %{
20889     int opcode = this->ideal_Opcode();
20890     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20891   %}
20892   ins_pipe( pipe_slow );
20893 %}
20894 
20895 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20896   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20897   match(Set dst (SignumVF src (Binary zero one)));
20898   match(Set dst (SignumVD src (Binary zero one)));
20899   effect(TEMP dst, TEMP xtmp1);
20900   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20901   ins_encode %{
20902     int opcode = this->ideal_Opcode();
20903     int vec_enc = vector_length_encoding(this);
20904     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20905                          $xtmp1$$XMMRegister, vec_enc);
20906   %}
20907   ins_pipe( pipe_slow );
20908 %}
20909 
20910 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20911   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20912   match(Set dst (SignumVF src (Binary zero one)));
20913   match(Set dst (SignumVD src (Binary zero one)));
20914   effect(TEMP dst, TEMP ktmp1);
20915   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20916   ins_encode %{
20917     int opcode = this->ideal_Opcode();
20918     int vec_enc = vector_length_encoding(this);
20919     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20920                           $ktmp1$$KRegister, vec_enc);
20921   %}
20922   ins_pipe( pipe_slow );
20923 %}
20924 
20925 // ---------------------------------------
20926 // For copySign use 0xE4 as writemask for vpternlog
20927 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20928 // C (xmm2) is set to 0x7FFFFFFF
20929 // Wherever xmm2 is 0, we want to pick from B (sign)
20930 // Wherever xmm2 is 1, we want to pick from A (src)
20931 //
20932 // A B C Result
20933 // 0 0 0 0
20934 // 0 0 1 0
20935 // 0 1 0 1
20936 // 0 1 1 0
20937 // 1 0 0 0
20938 // 1 0 1 1
20939 // 1 1 0 1
20940 // 1 1 1 1
20941 //
20942 // Result going from high bit to low bit is 0x11100100 = 0xe4
20943 // ---------------------------------------
20944 
20945 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
20946   match(Set dst (CopySignF dst src));
20947   effect(TEMP tmp1, TEMP tmp2);
20948   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20949   ins_encode %{
20950     __ movl($tmp2$$Register, 0x7FFFFFFF);
20951     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
20952     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20953   %}
20954   ins_pipe( pipe_slow );
20955 %}
20956 
20957 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
20958   match(Set dst (CopySignD dst (Binary src zero)));
20959   ins_cost(100);
20960   effect(TEMP tmp1, TEMP tmp2);
20961   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20962   ins_encode %{
20963     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
20964     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
20965     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20966   %}
20967   ins_pipe( pipe_slow );
20968 %}
20969 
20970 //----------------------------- CompressBits/ExpandBits ------------------------
20971 
20972 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20973   predicate(n->bottom_type()->isa_int());
20974   match(Set dst (CompressBits src mask));
20975   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
20976   ins_encode %{
20977     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
20978   %}
20979   ins_pipe( pipe_slow );
20980 %}
20981 
20982 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20983   predicate(n->bottom_type()->isa_int());
20984   match(Set dst (ExpandBits src mask));
20985   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
20986   ins_encode %{
20987     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
20988   %}
20989   ins_pipe( pipe_slow );
20990 %}
20991 
20992 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20993   predicate(n->bottom_type()->isa_int());
20994   match(Set dst (CompressBits src (LoadI mask)));
20995   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
20996   ins_encode %{
20997     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
20998   %}
20999   ins_pipe( pipe_slow );
21000 %}
21001 
21002 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21003   predicate(n->bottom_type()->isa_int());
21004   match(Set dst (ExpandBits src (LoadI mask)));
21005   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21006   ins_encode %{
21007     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21008   %}
21009   ins_pipe( pipe_slow );
21010 %}
21011 
21012 // --------------------------------- Sqrt --------------------------------------
21013 
21014 instruct vsqrtF_reg(vec dst, vec src) %{
21015   match(Set dst (SqrtVF src));
21016   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21017   ins_encode %{
21018     assert(UseAVX > 0, "required");
21019     int vlen_enc = vector_length_encoding(this);
21020     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21021   %}
21022   ins_pipe( pipe_slow );
21023 %}
21024 
21025 instruct vsqrtF_mem(vec dst, memory mem) %{
21026   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21027   match(Set dst (SqrtVF (LoadVector mem)));
21028   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21029   ins_encode %{
21030     assert(UseAVX > 0, "required");
21031     int vlen_enc = vector_length_encoding(this);
21032     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21033   %}
21034   ins_pipe( pipe_slow );
21035 %}
21036 
21037 // Floating point vector sqrt
21038 instruct vsqrtD_reg(vec dst, vec src) %{
21039   match(Set dst (SqrtVD src));
21040   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21041   ins_encode %{
21042     assert(UseAVX > 0, "required");
21043     int vlen_enc = vector_length_encoding(this);
21044     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21045   %}
21046   ins_pipe( pipe_slow );
21047 %}
21048 
21049 instruct vsqrtD_mem(vec dst, memory mem) %{
21050   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21051   match(Set dst (SqrtVD (LoadVector mem)));
21052   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21053   ins_encode %{
21054     assert(UseAVX > 0, "required");
21055     int vlen_enc = vector_length_encoding(this);
21056     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21057   %}
21058   ins_pipe( pipe_slow );
21059 %}
21060 
21061 // ------------------------------ Shift ---------------------------------------
21062 
21063 // Left and right shift count vectors are the same on x86
21064 // (only lowest bits of xmm reg are used for count).
21065 instruct vshiftcnt(vec dst, rRegI cnt) %{
21066   match(Set dst (LShiftCntV cnt));
21067   match(Set dst (RShiftCntV cnt));
21068   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21069   ins_encode %{
21070     __ movdl($dst$$XMMRegister, $cnt$$Register);
21071   %}
21072   ins_pipe( pipe_slow );
21073 %}
21074 
21075 // Byte vector shift
21076 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21077   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21078   match(Set dst ( LShiftVB src shift));
21079   match(Set dst ( RShiftVB src shift));
21080   match(Set dst (URShiftVB src shift));
21081   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21082   format %{"vector_byte_shift $dst,$src,$shift" %}
21083   ins_encode %{
21084     assert(UseSSE > 3, "required");
21085     int opcode = this->ideal_Opcode();
21086     bool sign = (opcode != Op_URShiftVB);
21087     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21088     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21089     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21090     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21091     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21092   %}
21093   ins_pipe( pipe_slow );
21094 %}
21095 
21096 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21097   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21098             UseAVX <= 1);
21099   match(Set dst ( LShiftVB src shift));
21100   match(Set dst ( RShiftVB src shift));
21101   match(Set dst (URShiftVB src shift));
21102   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21103   format %{"vector_byte_shift $dst,$src,$shift" %}
21104   ins_encode %{
21105     assert(UseSSE > 3, "required");
21106     int opcode = this->ideal_Opcode();
21107     bool sign = (opcode != Op_URShiftVB);
21108     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21109     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21110     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21111     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21112     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21113     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21114     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21115     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21116     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21117   %}
21118   ins_pipe( pipe_slow );
21119 %}
21120 
21121 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21122   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21123             UseAVX > 1);
21124   match(Set dst ( LShiftVB src shift));
21125   match(Set dst ( RShiftVB src shift));
21126   match(Set dst (URShiftVB src shift));
21127   effect(TEMP dst, TEMP tmp);
21128   format %{"vector_byte_shift $dst,$src,$shift" %}
21129   ins_encode %{
21130     int opcode = this->ideal_Opcode();
21131     bool sign = (opcode != Op_URShiftVB);
21132     int vlen_enc = Assembler::AVX_256bit;
21133     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21134     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21135     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21136     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21137     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21138   %}
21139   ins_pipe( pipe_slow );
21140 %}
21141 
21142 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21143   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21144   match(Set dst ( LShiftVB src shift));
21145   match(Set dst ( RShiftVB src shift));
21146   match(Set dst (URShiftVB src shift));
21147   effect(TEMP dst, TEMP tmp);
21148   format %{"vector_byte_shift $dst,$src,$shift" %}
21149   ins_encode %{
21150     assert(UseAVX > 1, "required");
21151     int opcode = this->ideal_Opcode();
21152     bool sign = (opcode != Op_URShiftVB);
21153     int vlen_enc = Assembler::AVX_256bit;
21154     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21155     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21156     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21157     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21158     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21159     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21160     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21161     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21162     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21163   %}
21164   ins_pipe( pipe_slow );
21165 %}
21166 
21167 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21168   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21169   match(Set dst ( LShiftVB src shift));
21170   match(Set dst  (RShiftVB src shift));
21171   match(Set dst (URShiftVB src shift));
21172   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21173   format %{"vector_byte_shift $dst,$src,$shift" %}
21174   ins_encode %{
21175     assert(UseAVX > 2, "required");
21176     int opcode = this->ideal_Opcode();
21177     bool sign = (opcode != Op_URShiftVB);
21178     int vlen_enc = Assembler::AVX_512bit;
21179     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21180     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21181     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21182     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21183     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21184     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21185     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21186     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21187     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21188     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21189     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21190     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21191   %}
21192   ins_pipe( pipe_slow );
21193 %}
21194 
21195 // Shorts vector logical right shift produces incorrect Java result
21196 // for negative data because java code convert short value into int with
21197 // sign extension before a shift. But char vectors are fine since chars are
21198 // unsigned values.
21199 // Shorts/Chars vector left shift
21200 instruct vshiftS(vec dst, vec src, vec shift) %{
21201   predicate(!n->as_ShiftV()->is_var_shift());
21202   match(Set dst ( LShiftVS src shift));
21203   match(Set dst ( RShiftVS src shift));
21204   match(Set dst (URShiftVS src shift));
21205   effect(TEMP dst, USE src, USE shift);
21206   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21207   ins_encode %{
21208     int opcode = this->ideal_Opcode();
21209     if (UseAVX > 0) {
21210       int vlen_enc = vector_length_encoding(this);
21211       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21212     } else {
21213       int vlen = Matcher::vector_length(this);
21214       if (vlen == 2) {
21215         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21216         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21217       } else if (vlen == 4) {
21218         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21219         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21220       } else {
21221         assert (vlen == 8, "sanity");
21222         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21223         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21224       }
21225     }
21226   %}
21227   ins_pipe( pipe_slow );
21228 %}
21229 
21230 // Integers vector left shift
21231 instruct vshiftI(vec dst, vec src, vec shift) %{
21232   predicate(!n->as_ShiftV()->is_var_shift());
21233   match(Set dst ( LShiftVI src shift));
21234   match(Set dst ( RShiftVI src shift));
21235   match(Set dst (URShiftVI src shift));
21236   effect(TEMP dst, USE src, USE shift);
21237   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21238   ins_encode %{
21239     int opcode = this->ideal_Opcode();
21240     if (UseAVX > 0) {
21241       int vlen_enc = vector_length_encoding(this);
21242       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21243     } else {
21244       int vlen = Matcher::vector_length(this);
21245       if (vlen == 2) {
21246         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21247         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21248       } else {
21249         assert(vlen == 4, "sanity");
21250         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21251         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21252       }
21253     }
21254   %}
21255   ins_pipe( pipe_slow );
21256 %}
21257 
21258 // Integers vector left constant shift
21259 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21260   match(Set dst (LShiftVI src (LShiftCntV shift)));
21261   match(Set dst (RShiftVI src (RShiftCntV shift)));
21262   match(Set dst (URShiftVI src (RShiftCntV shift)));
21263   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21264   ins_encode %{
21265     int opcode = this->ideal_Opcode();
21266     if (UseAVX > 0) {
21267       int vector_len = vector_length_encoding(this);
21268       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21269     } else {
21270       int vlen = Matcher::vector_length(this);
21271       if (vlen == 2) {
21272         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21273         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21274       } else {
21275         assert(vlen == 4, "sanity");
21276         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21277         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21278       }
21279     }
21280   %}
21281   ins_pipe( pipe_slow );
21282 %}
21283 
21284 // Longs vector shift
21285 instruct vshiftL(vec dst, vec src, vec shift) %{
21286   predicate(!n->as_ShiftV()->is_var_shift());
21287   match(Set dst ( LShiftVL src shift));
21288   match(Set dst (URShiftVL src shift));
21289   effect(TEMP dst, USE src, USE shift);
21290   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21291   ins_encode %{
21292     int opcode = this->ideal_Opcode();
21293     if (UseAVX > 0) {
21294       int vlen_enc = vector_length_encoding(this);
21295       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21296     } else {
21297       assert(Matcher::vector_length(this) == 2, "");
21298       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21299       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21300     }
21301   %}
21302   ins_pipe( pipe_slow );
21303 %}
21304 
21305 // Longs vector constant shift
21306 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21307   match(Set dst (LShiftVL src (LShiftCntV shift)));
21308   match(Set dst (URShiftVL src (RShiftCntV shift)));
21309   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21310   ins_encode %{
21311     int opcode = this->ideal_Opcode();
21312     if (UseAVX > 0) {
21313       int vector_len = vector_length_encoding(this);
21314       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21315     } else {
21316       assert(Matcher::vector_length(this) == 2, "");
21317       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21318       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21319     }
21320   %}
21321   ins_pipe( pipe_slow );
21322 %}
21323 
21324 // -------------------ArithmeticRightShift -----------------------------------
21325 // Long vector arithmetic right shift
21326 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21327   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21328   match(Set dst (RShiftVL src shift));
21329   effect(TEMP dst, TEMP tmp);
21330   format %{ "vshiftq $dst,$src,$shift" %}
21331   ins_encode %{
21332     uint vlen = Matcher::vector_length(this);
21333     if (vlen == 2) {
21334       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21335       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21336       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21337       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21338       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21339       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21340     } else {
21341       assert(vlen == 4, "sanity");
21342       assert(UseAVX > 1, "required");
21343       int vlen_enc = Assembler::AVX_256bit;
21344       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21345       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21346       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21347       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21348       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21349     }
21350   %}
21351   ins_pipe( pipe_slow );
21352 %}
21353 
21354 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21355   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21356   match(Set dst (RShiftVL src shift));
21357   format %{ "vshiftq $dst,$src,$shift" %}
21358   ins_encode %{
21359     int vlen_enc = vector_length_encoding(this);
21360     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21361   %}
21362   ins_pipe( pipe_slow );
21363 %}
21364 
21365 // ------------------- Variable Shift -----------------------------
21366 // Byte variable shift
21367 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21368   predicate(Matcher::vector_length(n) <= 8 &&
21369             n->as_ShiftV()->is_var_shift() &&
21370             !VM_Version::supports_avx512bw());
21371   match(Set dst ( LShiftVB src shift));
21372   match(Set dst ( RShiftVB src shift));
21373   match(Set dst (URShiftVB src shift));
21374   effect(TEMP dst, TEMP vtmp);
21375   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21376   ins_encode %{
21377     assert(UseAVX >= 2, "required");
21378 
21379     int opcode = this->ideal_Opcode();
21380     int vlen_enc = Assembler::AVX_128bit;
21381     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21382     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21383   %}
21384   ins_pipe( pipe_slow );
21385 %}
21386 
21387 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21388   predicate(Matcher::vector_length(n) == 16 &&
21389             n->as_ShiftV()->is_var_shift() &&
21390             !VM_Version::supports_avx512bw());
21391   match(Set dst ( LShiftVB src shift));
21392   match(Set dst ( RShiftVB src shift));
21393   match(Set dst (URShiftVB src shift));
21394   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21395   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21396   ins_encode %{
21397     assert(UseAVX >= 2, "required");
21398 
21399     int opcode = this->ideal_Opcode();
21400     int vlen_enc = Assembler::AVX_128bit;
21401     // Shift lower half and get word result in dst
21402     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21403 
21404     // Shift upper half and get word result in vtmp1
21405     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21406     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21407     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21408 
21409     // Merge and down convert the two word results to byte in dst
21410     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21411   %}
21412   ins_pipe( pipe_slow );
21413 %}
21414 
21415 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21416   predicate(Matcher::vector_length(n) == 32 &&
21417             n->as_ShiftV()->is_var_shift() &&
21418             !VM_Version::supports_avx512bw());
21419   match(Set dst ( LShiftVB src shift));
21420   match(Set dst ( RShiftVB src shift));
21421   match(Set dst (URShiftVB src shift));
21422   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21423   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21424   ins_encode %{
21425     assert(UseAVX >= 2, "required");
21426 
21427     int opcode = this->ideal_Opcode();
21428     int vlen_enc = Assembler::AVX_128bit;
21429     // Process lower 128 bits and get result in dst
21430     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21431     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21432     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21433     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21434     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21435 
21436     // Process higher 128 bits and get result in vtmp3
21437     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21438     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21439     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21440     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21441     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21442     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21443     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21444 
21445     // Merge the two results in dst
21446     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21447   %}
21448   ins_pipe( pipe_slow );
21449 %}
21450 
21451 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21452   predicate(Matcher::vector_length(n) <= 32 &&
21453             n->as_ShiftV()->is_var_shift() &&
21454             VM_Version::supports_avx512bw());
21455   match(Set dst ( LShiftVB src shift));
21456   match(Set dst ( RShiftVB src shift));
21457   match(Set dst (URShiftVB src shift));
21458   effect(TEMP dst, TEMP vtmp);
21459   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21460   ins_encode %{
21461     assert(UseAVX > 2, "required");
21462 
21463     int opcode = this->ideal_Opcode();
21464     int vlen_enc = vector_length_encoding(this);
21465     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21466   %}
21467   ins_pipe( pipe_slow );
21468 %}
21469 
21470 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21471   predicate(Matcher::vector_length(n) == 64 &&
21472             n->as_ShiftV()->is_var_shift() &&
21473             VM_Version::supports_avx512bw());
21474   match(Set dst ( LShiftVB src shift));
21475   match(Set dst ( RShiftVB src shift));
21476   match(Set dst (URShiftVB src shift));
21477   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21478   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21479   ins_encode %{
21480     assert(UseAVX > 2, "required");
21481 
21482     int opcode = this->ideal_Opcode();
21483     int vlen_enc = Assembler::AVX_256bit;
21484     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21485     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21486     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21487     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21488     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21489   %}
21490   ins_pipe( pipe_slow );
21491 %}
21492 
21493 // Short variable shift
21494 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21495   predicate(Matcher::vector_length(n) <= 8 &&
21496             n->as_ShiftV()->is_var_shift() &&
21497             !VM_Version::supports_avx512bw());
21498   match(Set dst ( LShiftVS src shift));
21499   match(Set dst ( RShiftVS src shift));
21500   match(Set dst (URShiftVS src shift));
21501   effect(TEMP dst, TEMP vtmp);
21502   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21503   ins_encode %{
21504     assert(UseAVX >= 2, "required");
21505 
21506     int opcode = this->ideal_Opcode();
21507     bool sign = (opcode != Op_URShiftVS);
21508     int vlen_enc = Assembler::AVX_256bit;
21509     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21510     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21511     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21512     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21513     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21514     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21515   %}
21516   ins_pipe( pipe_slow );
21517 %}
21518 
21519 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21520   predicate(Matcher::vector_length(n) == 16 &&
21521             n->as_ShiftV()->is_var_shift() &&
21522             !VM_Version::supports_avx512bw());
21523   match(Set dst ( LShiftVS src shift));
21524   match(Set dst ( RShiftVS src shift));
21525   match(Set dst (URShiftVS src shift));
21526   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21527   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21528   ins_encode %{
21529     assert(UseAVX >= 2, "required");
21530 
21531     int opcode = this->ideal_Opcode();
21532     bool sign = (opcode != Op_URShiftVS);
21533     int vlen_enc = Assembler::AVX_256bit;
21534     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21535     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21536     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21537     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21538     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21539 
21540     // Shift upper half, with result in dst using vtmp1 as TEMP
21541     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21542     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21543     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21544     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21545     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21546     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21547 
21548     // Merge lower and upper half result into dst
21549     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21550     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21551   %}
21552   ins_pipe( pipe_slow );
21553 %}
21554 
21555 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21556   predicate(n->as_ShiftV()->is_var_shift() &&
21557             VM_Version::supports_avx512bw());
21558   match(Set dst ( LShiftVS src shift));
21559   match(Set dst ( RShiftVS src shift));
21560   match(Set dst (URShiftVS src shift));
21561   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21562   ins_encode %{
21563     assert(UseAVX > 2, "required");
21564 
21565     int opcode = this->ideal_Opcode();
21566     int vlen_enc = vector_length_encoding(this);
21567     if (!VM_Version::supports_avx512vl()) {
21568       vlen_enc = Assembler::AVX_512bit;
21569     }
21570     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21571   %}
21572   ins_pipe( pipe_slow );
21573 %}
21574 
21575 //Integer variable shift
21576 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21577   predicate(n->as_ShiftV()->is_var_shift());
21578   match(Set dst ( LShiftVI src shift));
21579   match(Set dst ( RShiftVI src shift));
21580   match(Set dst (URShiftVI src shift));
21581   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21582   ins_encode %{
21583     assert(UseAVX >= 2, "required");
21584 
21585     int opcode = this->ideal_Opcode();
21586     int vlen_enc = vector_length_encoding(this);
21587     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21588   %}
21589   ins_pipe( pipe_slow );
21590 %}
21591 
21592 //Long variable shift
21593 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21594   predicate(n->as_ShiftV()->is_var_shift());
21595   match(Set dst ( LShiftVL src shift));
21596   match(Set dst (URShiftVL src shift));
21597   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21598   ins_encode %{
21599     assert(UseAVX >= 2, "required");
21600 
21601     int opcode = this->ideal_Opcode();
21602     int vlen_enc = vector_length_encoding(this);
21603     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21604   %}
21605   ins_pipe( pipe_slow );
21606 %}
21607 
21608 //Long variable right shift arithmetic
21609 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21610   predicate(Matcher::vector_length(n) <= 4 &&
21611             n->as_ShiftV()->is_var_shift() &&
21612             UseAVX == 2);
21613   match(Set dst (RShiftVL src shift));
21614   effect(TEMP dst, TEMP vtmp);
21615   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21616   ins_encode %{
21617     int opcode = this->ideal_Opcode();
21618     int vlen_enc = vector_length_encoding(this);
21619     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21620                  $vtmp$$XMMRegister);
21621   %}
21622   ins_pipe( pipe_slow );
21623 %}
21624 
21625 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21626   predicate(n->as_ShiftV()->is_var_shift() &&
21627             UseAVX > 2);
21628   match(Set dst (RShiftVL src shift));
21629   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21630   ins_encode %{
21631     int opcode = this->ideal_Opcode();
21632     int vlen_enc = vector_length_encoding(this);
21633     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21634   %}
21635   ins_pipe( pipe_slow );
21636 %}
21637 
21638 // --------------------------------- AND --------------------------------------
21639 
21640 instruct vand(vec dst, vec src) %{
21641   predicate(UseAVX == 0);
21642   match(Set dst (AndV dst src));
21643   format %{ "pand    $dst,$src\t! and vectors" %}
21644   ins_encode %{
21645     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21646   %}
21647   ins_pipe( pipe_slow );
21648 %}
21649 
21650 instruct vand_reg(vec dst, vec src1, vec src2) %{
21651   predicate(UseAVX > 0);
21652   match(Set dst (AndV src1 src2));
21653   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
21654   ins_encode %{
21655     int vlen_enc = vector_length_encoding(this);
21656     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21657   %}
21658   ins_pipe( pipe_slow );
21659 %}
21660 
21661 instruct vand_mem(vec dst, vec src, memory mem) %{
21662   predicate((UseAVX > 0) &&
21663             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21664   match(Set dst (AndV src (LoadVector mem)));
21665   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
21666   ins_encode %{
21667     int vlen_enc = vector_length_encoding(this);
21668     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21669   %}
21670   ins_pipe( pipe_slow );
21671 %}
21672 
21673 // --------------------------------- OR ---------------------------------------
21674 
21675 instruct vor(vec dst, vec src) %{
21676   predicate(UseAVX == 0);
21677   match(Set dst (OrV dst src));
21678   format %{ "por     $dst,$src\t! or vectors" %}
21679   ins_encode %{
21680     __ por($dst$$XMMRegister, $src$$XMMRegister);
21681   %}
21682   ins_pipe( pipe_slow );
21683 %}
21684 
21685 instruct vor_reg(vec dst, vec src1, vec src2) %{
21686   predicate(UseAVX > 0);
21687   match(Set dst (OrV src1 src2));
21688   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
21689   ins_encode %{
21690     int vlen_enc = vector_length_encoding(this);
21691     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21692   %}
21693   ins_pipe( pipe_slow );
21694 %}
21695 
21696 instruct vor_mem(vec dst, vec src, memory mem) %{
21697   predicate((UseAVX > 0) &&
21698             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21699   match(Set dst (OrV src (LoadVector mem)));
21700   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
21701   ins_encode %{
21702     int vlen_enc = vector_length_encoding(this);
21703     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21704   %}
21705   ins_pipe( pipe_slow );
21706 %}
21707 
21708 // --------------------------------- XOR --------------------------------------
21709 
21710 instruct vxor(vec dst, vec src) %{
21711   predicate(UseAVX == 0);
21712   match(Set dst (XorV dst src));
21713   format %{ "pxor    $dst,$src\t! xor vectors" %}
21714   ins_encode %{
21715     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21716   %}
21717   ins_pipe( pipe_slow );
21718 %}
21719 
21720 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21721   predicate(UseAVX > 0);
21722   match(Set dst (XorV src1 src2));
21723   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
21724   ins_encode %{
21725     int vlen_enc = vector_length_encoding(this);
21726     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21727   %}
21728   ins_pipe( pipe_slow );
21729 %}
21730 
21731 instruct vxor_mem(vec dst, vec src, memory mem) %{
21732   predicate((UseAVX > 0) &&
21733             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21734   match(Set dst (XorV src (LoadVector mem)));
21735   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
21736   ins_encode %{
21737     int vlen_enc = vector_length_encoding(this);
21738     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21739   %}
21740   ins_pipe( pipe_slow );
21741 %}
21742 
21743 // --------------------------------- VectorCast --------------------------------------
21744 
21745 instruct vcastBtoX(vec dst, vec src) %{
21746   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21747   match(Set dst (VectorCastB2X src));
21748   format %{ "vector_cast_b2x $dst,$src\t!" %}
21749   ins_encode %{
21750     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21751     int vlen_enc = vector_length_encoding(this);
21752     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21753   %}
21754   ins_pipe( pipe_slow );
21755 %}
21756 
21757 instruct vcastBtoD(legVec dst, legVec src) %{
21758   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21759   match(Set dst (VectorCastB2X src));
21760   format %{ "vector_cast_b2x $dst,$src\t!" %}
21761   ins_encode %{
21762     int vlen_enc = vector_length_encoding(this);
21763     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21764   %}
21765   ins_pipe( pipe_slow );
21766 %}
21767 
21768 instruct castStoX(vec dst, vec src) %{
21769   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21770             Matcher::vector_length(n->in(1)) <= 8 && // src
21771             Matcher::vector_element_basic_type(n) == T_BYTE);
21772   match(Set dst (VectorCastS2X src));
21773   format %{ "vector_cast_s2x $dst,$src" %}
21774   ins_encode %{
21775     assert(UseAVX > 0, "required");
21776 
21777     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21778     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21779   %}
21780   ins_pipe( pipe_slow );
21781 %}
21782 
21783 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21784   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21785             Matcher::vector_length(n->in(1)) == 16 && // src
21786             Matcher::vector_element_basic_type(n) == T_BYTE);
21787   effect(TEMP dst, TEMP vtmp);
21788   match(Set dst (VectorCastS2X src));
21789   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21790   ins_encode %{
21791     assert(UseAVX > 0, "required");
21792 
21793     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21794     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21795     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21796     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21797   %}
21798   ins_pipe( pipe_slow );
21799 %}
21800 
21801 instruct vcastStoX_evex(vec dst, vec src) %{
21802   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21803             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21804   match(Set dst (VectorCastS2X src));
21805   format %{ "vector_cast_s2x $dst,$src\t!" %}
21806   ins_encode %{
21807     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21808     int src_vlen_enc = vector_length_encoding(this, $src);
21809     int vlen_enc = vector_length_encoding(this);
21810     switch (to_elem_bt) {
21811       case T_BYTE:
21812         if (!VM_Version::supports_avx512vl()) {
21813           vlen_enc = Assembler::AVX_512bit;
21814         }
21815         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21816         break;
21817       case T_INT:
21818         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21819         break;
21820       case T_FLOAT:
21821         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21822         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21823         break;
21824       case T_LONG:
21825         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21826         break;
21827       case T_DOUBLE: {
21828         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21829         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21830         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21831         break;
21832       }
21833       default:
21834         ShouldNotReachHere();
21835     }
21836   %}
21837   ins_pipe( pipe_slow );
21838 %}
21839 
21840 instruct castItoX(vec dst, vec src) %{
21841   predicate(UseAVX <= 2 &&
21842             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21843             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21844   match(Set dst (VectorCastI2X src));
21845   format %{ "vector_cast_i2x $dst,$src" %}
21846   ins_encode %{
21847     assert(UseAVX > 0, "required");
21848 
21849     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21850     int vlen_enc = vector_length_encoding(this, $src);
21851 
21852     if (to_elem_bt == T_BYTE) {
21853       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21854       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21855       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21856     } else {
21857       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21858       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21859       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21860     }
21861   %}
21862   ins_pipe( pipe_slow );
21863 %}
21864 
21865 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21866   predicate(UseAVX <= 2 &&
21867             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21868             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21869   match(Set dst (VectorCastI2X src));
21870   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21871   effect(TEMP dst, TEMP vtmp);
21872   ins_encode %{
21873     assert(UseAVX > 0, "required");
21874 
21875     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21876     int vlen_enc = vector_length_encoding(this, $src);
21877 
21878     if (to_elem_bt == T_BYTE) {
21879       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21880       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21881       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21882       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21883     } else {
21884       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21885       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21886       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21887       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21888     }
21889   %}
21890   ins_pipe( pipe_slow );
21891 %}
21892 
21893 instruct vcastItoX_evex(vec dst, vec src) %{
21894   predicate(UseAVX > 2 ||
21895             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21896   match(Set dst (VectorCastI2X src));
21897   format %{ "vector_cast_i2x $dst,$src\t!" %}
21898   ins_encode %{
21899     assert(UseAVX > 0, "required");
21900 
21901     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21902     int src_vlen_enc = vector_length_encoding(this, $src);
21903     int dst_vlen_enc = vector_length_encoding(this);
21904     switch (dst_elem_bt) {
21905       case T_BYTE:
21906         if (!VM_Version::supports_avx512vl()) {
21907           src_vlen_enc = Assembler::AVX_512bit;
21908         }
21909         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21910         break;
21911       case T_SHORT:
21912         if (!VM_Version::supports_avx512vl()) {
21913           src_vlen_enc = Assembler::AVX_512bit;
21914         }
21915         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21916         break;
21917       case T_FLOAT:
21918         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21919         break;
21920       case T_LONG:
21921         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21922         break;
21923       case T_DOUBLE:
21924         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21925         break;
21926       default:
21927         ShouldNotReachHere();
21928     }
21929   %}
21930   ins_pipe( pipe_slow );
21931 %}
21932 
21933 instruct vcastLtoBS(vec dst, vec src) %{
21934   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21935             UseAVX <= 2);
21936   match(Set dst (VectorCastL2X src));
21937   format %{ "vector_cast_l2x  $dst,$src" %}
21938   ins_encode %{
21939     assert(UseAVX > 0, "required");
21940 
21941     int vlen = Matcher::vector_length_in_bytes(this, $src);
21942     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
21943     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
21944                                                       : ExternalAddress(vector_int_to_short_mask());
21945     if (vlen <= 16) {
21946       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
21947       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21948       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21949     } else {
21950       assert(vlen <= 32, "required");
21951       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
21952       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
21953       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21954       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21955     }
21956     if (to_elem_bt == T_BYTE) {
21957       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21958     }
21959   %}
21960   ins_pipe( pipe_slow );
21961 %}
21962 
21963 instruct vcastLtoX_evex(vec dst, vec src) %{
21964   predicate(UseAVX > 2 ||
21965             (Matcher::vector_element_basic_type(n) == T_INT ||
21966              Matcher::vector_element_basic_type(n) == T_FLOAT ||
21967              Matcher::vector_element_basic_type(n) == T_DOUBLE));
21968   match(Set dst (VectorCastL2X src));
21969   format %{ "vector_cast_l2x  $dst,$src\t!" %}
21970   ins_encode %{
21971     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21972     int vlen = Matcher::vector_length_in_bytes(this, $src);
21973     int vlen_enc = vector_length_encoding(this, $src);
21974     switch (to_elem_bt) {
21975       case T_BYTE:
21976         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21977           vlen_enc = Assembler::AVX_512bit;
21978         }
21979         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21980         break;
21981       case T_SHORT:
21982         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21983           vlen_enc = Assembler::AVX_512bit;
21984         }
21985         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21986         break;
21987       case T_INT:
21988         if (vlen == 8) {
21989           if ($dst$$XMMRegister != $src$$XMMRegister) {
21990             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21991           }
21992         } else if (vlen == 16) {
21993           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
21994         } else if (vlen == 32) {
21995           if (UseAVX > 2) {
21996             if (!VM_Version::supports_avx512vl()) {
21997               vlen_enc = Assembler::AVX_512bit;
21998             }
21999             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22000           } else {
22001             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22002             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22003           }
22004         } else { // vlen == 64
22005           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22006         }
22007         break;
22008       case T_FLOAT:
22009         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22010         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22011         break;
22012       case T_DOUBLE:
22013         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22014         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22015         break;
22016 
22017       default: assert(false, "%s", type2name(to_elem_bt));
22018     }
22019   %}
22020   ins_pipe( pipe_slow );
22021 %}
22022 
22023 instruct vcastFtoD_reg(vec dst, vec src) %{
22024   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22025   match(Set dst (VectorCastF2X src));
22026   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22027   ins_encode %{
22028     int vlen_enc = vector_length_encoding(this);
22029     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22030   %}
22031   ins_pipe( pipe_slow );
22032 %}
22033 
22034 
22035 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22036   predicate(!VM_Version::supports_avx10_2() &&
22037             !VM_Version::supports_avx512vl() &&
22038             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22039             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22040             is_integral_type(Matcher::vector_element_basic_type(n)));
22041   match(Set dst (VectorCastF2X src));
22042   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22043   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22044   ins_encode %{
22045     int vlen_enc = vector_length_encoding(this, $src);
22046     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22047     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22048     // 32 bit addresses for register indirect addressing mode since stub constants
22049     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22050     // However, targets are free to increase this limit, but having a large code cache size
22051     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22052     // cap we save a temporary register allocation which in limiting case can prevent
22053     // spilling in high register pressure blocks.
22054     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22055                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22056                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22057   %}
22058   ins_pipe( pipe_slow );
22059 %}
22060 
22061 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22062   predicate(!VM_Version::supports_avx10_2() &&
22063             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22064             is_integral_type(Matcher::vector_element_basic_type(n)));
22065   match(Set dst (VectorCastF2X src));
22066   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22067   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22068   ins_encode %{
22069     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22070     if (to_elem_bt == T_LONG) {
22071       int vlen_enc = vector_length_encoding(this);
22072       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22073                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22074                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22075     } else {
22076       int vlen_enc = vector_length_encoding(this, $src);
22077       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22078                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22079                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22080     }
22081   %}
22082   ins_pipe( pipe_slow );
22083 %}
22084 
22085 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22086   predicate(VM_Version::supports_avx10_2() &&
22087             is_integral_type(Matcher::vector_element_basic_type(n)));
22088   match(Set dst (VectorCastF2X src));
22089   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22090   ins_encode %{
22091     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22092     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22093     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22094   %}
22095   ins_pipe( pipe_slow );
22096 %}
22097 
22098 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22099   predicate(VM_Version::supports_avx10_2() &&
22100             is_integral_type(Matcher::vector_element_basic_type(n)));
22101   match(Set dst (VectorCastF2X (LoadVector src)));
22102   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22103   ins_encode %{
22104     int vlen = Matcher::vector_length(this);
22105     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22106     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22107     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22108   %}
22109   ins_pipe( pipe_slow );
22110 %}
22111 
22112 instruct vcastDtoF_reg(vec dst, vec src) %{
22113   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22114   match(Set dst (VectorCastD2X src));
22115   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22116   ins_encode %{
22117     int vlen_enc = vector_length_encoding(this, $src);
22118     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22119   %}
22120   ins_pipe( pipe_slow );
22121 %}
22122 
22123 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22124   predicate(!VM_Version::supports_avx10_2() &&
22125             !VM_Version::supports_avx512vl() &&
22126             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22127             is_integral_type(Matcher::vector_element_basic_type(n)));
22128   match(Set dst (VectorCastD2X src));
22129   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22130   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22131   ins_encode %{
22132     int vlen_enc = vector_length_encoding(this, $src);
22133     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22134     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22135                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22136                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22137   %}
22138   ins_pipe( pipe_slow );
22139 %}
22140 
22141 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22142   predicate(!VM_Version::supports_avx10_2() &&
22143             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22144             is_integral_type(Matcher::vector_element_basic_type(n)));
22145   match(Set dst (VectorCastD2X src));
22146   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22147   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22148   ins_encode %{
22149     int vlen_enc = vector_length_encoding(this, $src);
22150     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22151     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22152                               ExternalAddress(vector_float_signflip());
22153     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22154                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22155   %}
22156   ins_pipe( pipe_slow );
22157 %}
22158 
22159 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22160   predicate(VM_Version::supports_avx10_2() &&
22161             is_integral_type(Matcher::vector_element_basic_type(n)));
22162   match(Set dst (VectorCastD2X src));
22163   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22164   ins_encode %{
22165     int vlen_enc = vector_length_encoding(this, $src);
22166     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22167     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22168   %}
22169   ins_pipe( pipe_slow );
22170 %}
22171 
22172 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22173   predicate(VM_Version::supports_avx10_2() &&
22174             is_integral_type(Matcher::vector_element_basic_type(n)));
22175   match(Set dst (VectorCastD2X (LoadVector src)));
22176   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22177   ins_encode %{
22178     int vlen = Matcher::vector_length(this);
22179     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22180     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22181     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22182   %}
22183   ins_pipe( pipe_slow );
22184 %}
22185 
22186 instruct vucast(vec dst, vec src) %{
22187   match(Set dst (VectorUCastB2X src));
22188   match(Set dst (VectorUCastS2X src));
22189   match(Set dst (VectorUCastI2X src));
22190   format %{ "vector_ucast $dst,$src\t!" %}
22191   ins_encode %{
22192     assert(UseAVX > 0, "required");
22193 
22194     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22195     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22196     int vlen_enc = vector_length_encoding(this);
22197     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22198   %}
22199   ins_pipe( pipe_slow );
22200 %}
22201 
22202 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22203   predicate(!VM_Version::supports_avx512vl() &&
22204             Matcher::vector_length_in_bytes(n) < 64 &&
22205             Matcher::vector_element_basic_type(n) == T_INT);
22206   match(Set dst (RoundVF src));
22207   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22208   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22209   ins_encode %{
22210     int vlen_enc = vector_length_encoding(this);
22211     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22212     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22213                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22214                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22215   %}
22216   ins_pipe( pipe_slow );
22217 %}
22218 
22219 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22220   predicate((VM_Version::supports_avx512vl() ||
22221              Matcher::vector_length_in_bytes(n) == 64) &&
22222              Matcher::vector_element_basic_type(n) == T_INT);
22223   match(Set dst (RoundVF src));
22224   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22225   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22226   ins_encode %{
22227     int vlen_enc = vector_length_encoding(this);
22228     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22229     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22230                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22231                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22232   %}
22233   ins_pipe( pipe_slow );
22234 %}
22235 
22236 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22237   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22238   match(Set dst (RoundVD src));
22239   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22240   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22241   ins_encode %{
22242     int vlen_enc = vector_length_encoding(this);
22243     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22244     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22245                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22246                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22247   %}
22248   ins_pipe( pipe_slow );
22249 %}
22250 
22251 // --------------------------------- VectorMaskCmp --------------------------------------
22252 
22253 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22254   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22255             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22256             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22257             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22258   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22259   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22260   ins_encode %{
22261     int vlen_enc = vector_length_encoding(this, $src1);
22262     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22263     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22264       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22265     } else {
22266       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22267     }
22268   %}
22269   ins_pipe( pipe_slow );
22270 %}
22271 
22272 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22273   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22274             n->bottom_type()->isa_pvectmask() == nullptr &&
22275             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22276   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22277   effect(TEMP ktmp);
22278   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22279   ins_encode %{
22280     int vlen_enc = Assembler::AVX_512bit;
22281     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22282     KRegister mask = k0; // The comparison itself is not being masked.
22283     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22284       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22285       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22286     } else {
22287       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22288       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22289     }
22290   %}
22291   ins_pipe( pipe_slow );
22292 %}
22293 
22294 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22295   predicate(n->bottom_type()->isa_pvectmask() &&
22296             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22297   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22298   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22299   ins_encode %{
22300     assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22301     int vlen_enc = vector_length_encoding(this, $src1);
22302     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22303     KRegister mask = k0; // The comparison itself is not being masked.
22304     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22305       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22306     } else {
22307       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22308     }
22309   %}
22310   ins_pipe( pipe_slow );
22311 %}
22312 
22313 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22314   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22315             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22316             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22317             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22318             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22319             (n->in(2)->get_int() == BoolTest::eq ||
22320              n->in(2)->get_int() == BoolTest::lt ||
22321              n->in(2)->get_int() == BoolTest::gt)); // cond
22322   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22323   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22324   ins_encode %{
22325     int vlen_enc = vector_length_encoding(this, $src1);
22326     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22327     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22328     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22329   %}
22330   ins_pipe( pipe_slow );
22331 %}
22332 
22333 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22334   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22335             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22336             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22337             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22338             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22339             (n->in(2)->get_int() == BoolTest::ne ||
22340              n->in(2)->get_int() == BoolTest::le ||
22341              n->in(2)->get_int() == BoolTest::ge)); // cond
22342   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22343   effect(TEMP dst, TEMP xtmp);
22344   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22345   ins_encode %{
22346     int vlen_enc = vector_length_encoding(this, $src1);
22347     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22348     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22349     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22350   %}
22351   ins_pipe( pipe_slow );
22352 %}
22353 
22354 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22355   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22356             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22357             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22358             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22359             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22360   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22361   effect(TEMP dst, TEMP xtmp);
22362   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22363   ins_encode %{
22364     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22365     int vlen_enc = vector_length_encoding(this, $src1);
22366     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22367     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22368 
22369     if (vlen_enc == Assembler::AVX_128bit) {
22370       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22371     } else {
22372       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22373     }
22374     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22375     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22376     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22377   %}
22378   ins_pipe( pipe_slow );
22379 %}
22380 
22381 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22382   predicate((n->bottom_type()->isa_pvectmask() == nullptr &&
22383              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22384              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22385   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22386   effect(TEMP ktmp);
22387   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22388   ins_encode %{
22389     assert(UseAVX > 2, "required");
22390 
22391     int vlen_enc = vector_length_encoding(this, $src1);
22392     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22393     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22394     KRegister mask = k0; // The comparison itself is not being masked.
22395     bool merge = false;
22396     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22397 
22398     switch (src1_elem_bt) {
22399       case T_INT: {
22400         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22401         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22402         break;
22403       }
22404       case T_LONG: {
22405         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22406         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22407         break;
22408       }
22409       default: assert(false, "%s", type2name(src1_elem_bt));
22410     }
22411   %}
22412   ins_pipe( pipe_slow );
22413 %}
22414 
22415 
22416 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22417   predicate(n->bottom_type()->isa_pvectmask() &&
22418             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22419   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22420   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22421   ins_encode %{
22422     assert(UseAVX > 2, "required");
22423     assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22424 
22425     int vlen_enc = vector_length_encoding(this, $src1);
22426     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22427     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22428     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22429 
22430     // Comparison i
22431     switch (src1_elem_bt) {
22432       case T_BYTE: {
22433         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22434         break;
22435       }
22436       case T_SHORT: {
22437         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22438         break;
22439       }
22440       case T_INT: {
22441         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22442         break;
22443       }
22444       case T_LONG: {
22445         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22446         break;
22447       }
22448       default: assert(false, "%s", type2name(src1_elem_bt));
22449     }
22450   %}
22451   ins_pipe( pipe_slow );
22452 %}
22453 
22454 // Extract
22455 
22456 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22457   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22458   match(Set dst (ExtractI src idx));
22459   match(Set dst (ExtractS src idx));
22460   match(Set dst (ExtractB src idx));
22461   format %{ "extractI $dst,$src,$idx\t!" %}
22462   ins_encode %{
22463     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22464 
22465     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22466     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22467   %}
22468   ins_pipe( pipe_slow );
22469 %}
22470 
22471 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22472   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22473             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22474   match(Set dst (ExtractI src idx));
22475   match(Set dst (ExtractS src idx));
22476   match(Set dst (ExtractB src idx));
22477   effect(TEMP vtmp);
22478   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22479   ins_encode %{
22480     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22481 
22482     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22483     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22484     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22485   %}
22486   ins_pipe( pipe_slow );
22487 %}
22488 
22489 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22490   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22491   match(Set dst (ExtractL src idx));
22492   format %{ "extractL $dst,$src,$idx\t!" %}
22493   ins_encode %{
22494     assert(UseSSE >= 4, "required");
22495     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22496 
22497     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22498   %}
22499   ins_pipe( pipe_slow );
22500 %}
22501 
22502 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22503   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22504             Matcher::vector_length(n->in(1)) == 8);  // src
22505   match(Set dst (ExtractL src idx));
22506   effect(TEMP vtmp);
22507   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22508   ins_encode %{
22509     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22510 
22511     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22512     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22513   %}
22514   ins_pipe( pipe_slow );
22515 %}
22516 
22517 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22518   predicate(Matcher::vector_length(n->in(1)) <= 4);
22519   match(Set dst (ExtractF src idx));
22520   effect(TEMP dst, TEMP vtmp);
22521   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22522   ins_encode %{
22523     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22524 
22525     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22526   %}
22527   ins_pipe( pipe_slow );
22528 %}
22529 
22530 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22531   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22532             Matcher::vector_length(n->in(1)/*src*/) == 16);
22533   match(Set dst (ExtractF src idx));
22534   effect(TEMP vtmp);
22535   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22536   ins_encode %{
22537     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22538 
22539     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22540     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22541   %}
22542   ins_pipe( pipe_slow );
22543 %}
22544 
22545 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22546   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22547   match(Set dst (ExtractD src idx));
22548   format %{ "extractD $dst,$src,$idx\t!" %}
22549   ins_encode %{
22550     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22551 
22552     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22553   %}
22554   ins_pipe( pipe_slow );
22555 %}
22556 
22557 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22558   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22559             Matcher::vector_length(n->in(1)) == 8);  // src
22560   match(Set dst (ExtractD src idx));
22561   effect(TEMP vtmp);
22562   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22563   ins_encode %{
22564     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22565 
22566     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22567     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22568   %}
22569   ins_pipe( pipe_slow );
22570 %}
22571 
22572 // --------------------------------- Vector Blend --------------------------------------
22573 
22574 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22575   predicate(UseAVX == 0);
22576   match(Set dst (VectorBlend (Binary dst src) mask));
22577   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22578   effect(TEMP tmp);
22579   ins_encode %{
22580     assert(UseSSE >= 4, "required");
22581 
22582     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22583       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22584     }
22585     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22586   %}
22587   ins_pipe( pipe_slow );
22588 %}
22589 
22590 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22591   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22592             n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22593             Matcher::vector_length_in_bytes(n) <= 32 &&
22594             is_integral_type(Matcher::vector_element_basic_type(n)));
22595   match(Set dst (VectorBlend (Binary src1 src2) mask));
22596   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22597   ins_encode %{
22598     int vlen_enc = vector_length_encoding(this);
22599     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22600   %}
22601   ins_pipe( pipe_slow );
22602 %}
22603 
22604 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22605   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22606             n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22607             Matcher::vector_length_in_bytes(n) <= 32 &&
22608             !is_integral_type(Matcher::vector_element_basic_type(n)));
22609   match(Set dst (VectorBlend (Binary src1 src2) mask));
22610   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22611   ins_encode %{
22612     int vlen_enc = vector_length_encoding(this);
22613     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22614   %}
22615   ins_pipe( pipe_slow );
22616 %}
22617 
22618 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22619   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22620             n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22621             Matcher::vector_length_in_bytes(n) <= 32);
22622   match(Set dst (VectorBlend (Binary src1 src2) mask));
22623   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22624   effect(TEMP vtmp, TEMP dst);
22625   ins_encode %{
22626     int vlen_enc = vector_length_encoding(this);
22627     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22628     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22629     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22630   %}
22631   ins_pipe( pipe_slow );
22632 %}
22633 
22634 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22635   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22636             n->in(2)->bottom_type()->isa_pvectmask() == nullptr);
22637   match(Set dst (VectorBlend (Binary src1 src2) mask));
22638   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22639   effect(TEMP ktmp);
22640   ins_encode %{
22641      int vlen_enc = Assembler::AVX_512bit;
22642      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22643     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22644     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22645   %}
22646   ins_pipe( pipe_slow );
22647 %}
22648 
22649 
22650 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22651   predicate(n->in(2)->bottom_type()->isa_pvectmask() &&
22652             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22653              VM_Version::supports_avx512bw()));
22654   match(Set dst (VectorBlend (Binary src1 src2) mask));
22655   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22656   ins_encode %{
22657     int vlen_enc = vector_length_encoding(this);
22658     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22659     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22660   %}
22661   ins_pipe( pipe_slow );
22662 %}
22663 
22664 // --------------------------------- ABS --------------------------------------
22665 // a = |a|
22666 instruct vabsB_reg(vec dst, vec src) %{
22667   match(Set dst (AbsVB  src));
22668   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22669   ins_encode %{
22670     uint vlen = Matcher::vector_length(this);
22671     if (vlen <= 16) {
22672       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22673     } else {
22674       int vlen_enc = vector_length_encoding(this);
22675       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22676     }
22677   %}
22678   ins_pipe( pipe_slow );
22679 %}
22680 
22681 instruct vabsS_reg(vec dst, vec src) %{
22682   match(Set dst (AbsVS  src));
22683   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22684   ins_encode %{
22685     uint vlen = Matcher::vector_length(this);
22686     if (vlen <= 8) {
22687       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22688     } else {
22689       int vlen_enc = vector_length_encoding(this);
22690       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22691     }
22692   %}
22693   ins_pipe( pipe_slow );
22694 %}
22695 
22696 instruct vabsI_reg(vec dst, vec src) %{
22697   match(Set dst (AbsVI  src));
22698   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22699   ins_encode %{
22700     uint vlen = Matcher::vector_length(this);
22701     if (vlen <= 4) {
22702       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22703     } else {
22704       int vlen_enc = vector_length_encoding(this);
22705       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22706     }
22707   %}
22708   ins_pipe( pipe_slow );
22709 %}
22710 
22711 instruct vabsL_reg(vec dst, vec src) %{
22712   match(Set dst (AbsVL  src));
22713   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22714   ins_encode %{
22715     assert(UseAVX > 2, "required");
22716     int vlen_enc = vector_length_encoding(this);
22717     if (!VM_Version::supports_avx512vl()) {
22718       vlen_enc = Assembler::AVX_512bit;
22719     }
22720     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22721   %}
22722   ins_pipe( pipe_slow );
22723 %}
22724 
22725 // --------------------------------- ABSNEG --------------------------------------
22726 
22727 instruct vabsnegF(vec dst, vec src) %{
22728   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22729   match(Set dst (AbsVF src));
22730   match(Set dst (NegVF src));
22731   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22732   ins_cost(150);
22733   ins_encode %{
22734     int opcode = this->ideal_Opcode();
22735     int vlen = Matcher::vector_length(this);
22736     if (vlen == 2) {
22737       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22738     } else {
22739       assert(vlen == 8 || vlen == 16, "required");
22740       int vlen_enc = vector_length_encoding(this);
22741       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22742     }
22743   %}
22744   ins_pipe( pipe_slow );
22745 %}
22746 
22747 instruct vabsneg4F(vec dst) %{
22748   predicate(Matcher::vector_length(n) == 4);
22749   match(Set dst (AbsVF dst));
22750   match(Set dst (NegVF dst));
22751   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22752   ins_cost(150);
22753   ins_encode %{
22754     int opcode = this->ideal_Opcode();
22755     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22756   %}
22757   ins_pipe( pipe_slow );
22758 %}
22759 
22760 instruct vabsnegD(vec dst, vec src) %{
22761   match(Set dst (AbsVD  src));
22762   match(Set dst (NegVD  src));
22763   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22764   ins_encode %{
22765     int opcode = this->ideal_Opcode();
22766     uint vlen = Matcher::vector_length(this);
22767     if (vlen == 2) {
22768       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22769     } else {
22770       int vlen_enc = vector_length_encoding(this);
22771       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22772     }
22773   %}
22774   ins_pipe( pipe_slow );
22775 %}
22776 
22777 //------------------------------------- VectorTest --------------------------------------------
22778 
22779 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22780   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22781   match(Set cr (VectorTest src1 src2));
22782   effect(TEMP vtmp);
22783   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
22784   ins_encode %{
22785     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22786     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22787     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22788   %}
22789   ins_pipe( pipe_slow );
22790 %}
22791 
22792 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22793   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22794   match(Set cr (VectorTest src1 src2));
22795   format %{ "vptest_ge16  $src1, $src2\n\t" %}
22796   ins_encode %{
22797     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22798     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22799     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22800   %}
22801   ins_pipe( pipe_slow );
22802 %}
22803 
22804 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22805   predicate((Matcher::vector_length(n->in(1)) < 8 ||
22806              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22807             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22808   match(Set cr (VectorTest src1 src2));
22809   effect(TEMP tmp);
22810   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
22811   ins_encode %{
22812     uint masklen = Matcher::vector_length(this, $src1);
22813     __ kmovwl($tmp$$Register, $src1$$KRegister);
22814     __ andl($tmp$$Register, (1 << masklen) - 1);
22815     __ cmpl($tmp$$Register, (1 << masklen) - 1);
22816   %}
22817   ins_pipe( pipe_slow );
22818 %}
22819 
22820 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22821   predicate((Matcher::vector_length(n->in(1)) < 8 ||
22822              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22823             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22824   match(Set cr (VectorTest src1 src2));
22825   effect(TEMP tmp);
22826   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
22827   ins_encode %{
22828     uint masklen = Matcher::vector_length(this, $src1);
22829     __ kmovwl($tmp$$Register, $src1$$KRegister);
22830     __ andl($tmp$$Register, (1 << masklen) - 1);
22831   %}
22832   ins_pipe( pipe_slow );
22833 %}
22834 
22835 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22836   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22837             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22838   match(Set cr (VectorTest src1 src2));
22839   format %{ "ktest_ge8  $src1, $src2\n\t" %}
22840   ins_encode %{
22841     uint masklen = Matcher::vector_length(this, $src1);
22842     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22843   %}
22844   ins_pipe( pipe_slow );
22845 %}
22846 
22847 //------------------------------------- LoadMask --------------------------------------------
22848 
22849 instruct loadMask(legVec dst, legVec src) %{
22850   predicate(n->bottom_type()->isa_pvectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22851   match(Set dst (VectorLoadMask src));
22852   effect(TEMP dst);
22853   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22854   ins_encode %{
22855     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22856     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22857     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22858   %}
22859   ins_pipe( pipe_slow );
22860 %}
22861 
22862 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22863   predicate(n->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
22864   match(Set dst (VectorLoadMask src));
22865   effect(TEMP xtmp);
22866   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22867   ins_encode %{
22868     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22869                         true, Assembler::AVX_512bit);
22870   %}
22871   ins_pipe( pipe_slow );
22872 %}
22873 
22874 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
22875   predicate(n->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
22876   match(Set dst (VectorLoadMask src));
22877   effect(TEMP xtmp);
22878   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22879   ins_encode %{
22880     int vlen_enc = vector_length_encoding(in(1));
22881     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22882                         false, vlen_enc);
22883   %}
22884   ins_pipe( pipe_slow );
22885 %}
22886 
22887 //------------------------------------- StoreMask --------------------------------------------
22888 
22889 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22890   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22891   match(Set dst (VectorStoreMask src size));
22892   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22893   ins_encode %{
22894     int vlen = Matcher::vector_length(this);
22895     if (vlen <= 16 && UseAVX <= 2) {
22896       assert(UseSSE >= 3, "required");
22897       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22898     } else {
22899       assert(UseAVX > 0, "required");
22900       int src_vlen_enc = vector_length_encoding(this, $src);
22901       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22902     }
22903   %}
22904   ins_pipe( pipe_slow );
22905 %}
22906 
22907 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22908   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22909   match(Set dst (VectorStoreMask src size));
22910   effect(TEMP_DEF dst, TEMP xtmp);
22911   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22912   ins_encode %{
22913     int vlen_enc = Assembler::AVX_128bit;
22914     int vlen = Matcher::vector_length(this);
22915     if (vlen <= 8) {
22916       assert(UseSSE >= 3, "required");
22917       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22918       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22919       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22920     } else {
22921       assert(UseAVX > 0, "required");
22922       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22923       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22924       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22925     }
22926   %}
22927   ins_pipe( pipe_slow );
22928 %}
22929 
22930 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22931   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22932   match(Set dst (VectorStoreMask src size));
22933   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22934   effect(TEMP_DEF dst, TEMP xtmp);
22935   ins_encode %{
22936     int vlen_enc = Assembler::AVX_128bit;
22937     int vlen = Matcher::vector_length(this);
22938     if (vlen <= 4) {
22939       assert(UseSSE >= 3, "required");
22940       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22941       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22942       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22943       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22944     } else {
22945       assert(UseAVX > 0, "required");
22946       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22947       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22948       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22949       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22950       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22951     }
22952   %}
22953   ins_pipe( pipe_slow );
22954 %}
22955 
22956 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
22957   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
22958   match(Set dst (VectorStoreMask src size));
22959   effect(TEMP_DEF dst, TEMP xtmp);
22960   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22961   ins_encode %{
22962     assert(UseSSE >= 3, "required");
22963     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22964     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
22965     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
22966     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22967     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22968   %}
22969   ins_pipe( pipe_slow );
22970 %}
22971 
22972 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
22973   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
22974   match(Set dst (VectorStoreMask src size));
22975   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
22976   effect(TEMP_DEF dst, TEMP vtmp);
22977   ins_encode %{
22978     int vlen_enc = Assembler::AVX_128bit;
22979     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
22980     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22981     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
22982     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22983     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22984     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22985     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22986   %}
22987   ins_pipe( pipe_slow );
22988 %}
22989 
22990 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
22991   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22992   match(Set dst (VectorStoreMask src size));
22993   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22994   ins_encode %{
22995     int src_vlen_enc = vector_length_encoding(this, $src);
22996     int dst_vlen_enc = vector_length_encoding(this);
22997     if (!VM_Version::supports_avx512vl()) {
22998       src_vlen_enc = Assembler::AVX_512bit;
22999     }
23000     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23001     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23002   %}
23003   ins_pipe( pipe_slow );
23004 %}
23005 
23006 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23007   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23008   match(Set dst (VectorStoreMask src size));
23009   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23010   ins_encode %{
23011     int src_vlen_enc = vector_length_encoding(this, $src);
23012     int dst_vlen_enc = vector_length_encoding(this);
23013     if (!VM_Version::supports_avx512vl()) {
23014       src_vlen_enc = Assembler::AVX_512bit;
23015     }
23016     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23017     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23018   %}
23019   ins_pipe( pipe_slow );
23020 %}
23021 
23022 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23023   predicate(n->in(1)->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
23024   match(Set dst (VectorStoreMask mask size));
23025   effect(TEMP_DEF dst);
23026   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23027   ins_encode %{
23028     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23029     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23030                  false, Assembler::AVX_512bit, noreg);
23031     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23032   %}
23033   ins_pipe( pipe_slow );
23034 %}
23035 
23036 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23037   predicate(n->in(1)->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
23038   match(Set dst (VectorStoreMask mask size));
23039   effect(TEMP_DEF dst);
23040   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23041   ins_encode %{
23042     int dst_vlen_enc = vector_length_encoding(this);
23043     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23044     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23045   %}
23046   ins_pipe( pipe_slow );
23047 %}
23048 
23049 instruct vmaskcast_evex(kReg dst) %{
23050   match(Set dst (VectorMaskCast dst));
23051   ins_cost(0);
23052   format %{ "vector_mask_cast $dst" %}
23053   ins_encode %{
23054     // empty
23055   %}
23056   ins_pipe(empty);
23057 %}
23058 
23059 instruct vmaskcast(vec dst) %{
23060   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23061   match(Set dst (VectorMaskCast dst));
23062   ins_cost(0);
23063   format %{ "vector_mask_cast $dst" %}
23064   ins_encode %{
23065     // empty
23066   %}
23067   ins_pipe(empty);
23068 %}
23069 
23070 instruct vmaskcast_avx(vec dst, vec src) %{
23071   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23072   match(Set dst (VectorMaskCast src));
23073   format %{ "vector_mask_cast $dst, $src" %}
23074   ins_encode %{
23075     int vlen = Matcher::vector_length(this);
23076     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23077     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23078     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23079   %}
23080   ins_pipe(pipe_slow);
23081 %}
23082 
23083 //-------------------------------- Load Iota Indices ----------------------------------
23084 
23085 instruct loadIotaIndices(vec dst, immI_0 src) %{
23086   match(Set dst (VectorLoadConst src));
23087   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23088   ins_encode %{
23089      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23090      BasicType bt = Matcher::vector_element_basic_type(this);
23091      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23092   %}
23093   ins_pipe( pipe_slow );
23094 %}
23095 
23096 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23097   match(Set dst (PopulateIndex src1 src2));
23098   effect(TEMP dst, TEMP vtmp);
23099   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23100   ins_encode %{
23101      assert($src2$$constant == 1, "required");
23102      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23103      int vlen_enc = vector_length_encoding(this);
23104      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23105      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23106      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23107      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23108   %}
23109   ins_pipe( pipe_slow );
23110 %}
23111 
23112 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23113   match(Set dst (PopulateIndex src1 src2));
23114   effect(TEMP dst, TEMP vtmp);
23115   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23116   ins_encode %{
23117      assert($src2$$constant == 1, "required");
23118      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23119      int vlen_enc = vector_length_encoding(this);
23120      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23121      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23122      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23123      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23124   %}
23125   ins_pipe( pipe_slow );
23126 %}
23127 
23128 //-------------------------------- Rearrange ----------------------------------
23129 
23130 // LoadShuffle/Rearrange for Byte
23131 instruct rearrangeB(vec dst, vec shuffle) %{
23132   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23133             Matcher::vector_length(n) < 32);
23134   match(Set dst (VectorRearrange dst shuffle));
23135   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23136   ins_encode %{
23137     assert(UseSSE >= 4, "required");
23138     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23139   %}
23140   ins_pipe( pipe_slow );
23141 %}
23142 
23143 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23144   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23145             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23146   match(Set dst (VectorRearrange src shuffle));
23147   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23148   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23149   ins_encode %{
23150     assert(UseAVX >= 2, "required");
23151     // Swap src into vtmp1
23152     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23153     // Shuffle swapped src to get entries from other 128 bit lane
23154     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23155     // Shuffle original src to get entries from self 128 bit lane
23156     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23157     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23158     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23159     // Perform the blend
23160     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23161   %}
23162   ins_pipe( pipe_slow );
23163 %}
23164 
23165 
23166 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23167   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23168             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23169   match(Set dst (VectorRearrange src shuffle));
23170   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23171   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23172   ins_encode %{
23173     int vlen_enc = vector_length_encoding(this);
23174     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23175                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23176                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23177   %}
23178   ins_pipe( pipe_slow );
23179 %}
23180 
23181 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23182   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23183             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23184   match(Set dst (VectorRearrange src shuffle));
23185   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23186   ins_encode %{
23187     int vlen_enc = vector_length_encoding(this);
23188     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23189   %}
23190   ins_pipe( pipe_slow );
23191 %}
23192 
23193 // LoadShuffle/Rearrange for Short
23194 
23195 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23196   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23197             !VM_Version::supports_avx512bw());
23198   match(Set dst (VectorLoadShuffle src));
23199   effect(TEMP dst, TEMP vtmp);
23200   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23201   ins_encode %{
23202     // Create a byte shuffle mask from short shuffle mask
23203     // only byte shuffle instruction available on these platforms
23204     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23205     if (UseAVX == 0) {
23206       assert(vlen_in_bytes <= 16, "required");
23207       // Multiply each shuffle by two to get byte index
23208       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23209       __ psllw($vtmp$$XMMRegister, 1);
23210 
23211       // Duplicate to create 2 copies of byte index
23212       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23213       __ psllw($dst$$XMMRegister, 8);
23214       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23215 
23216       // Add one to get alternate byte index
23217       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23218       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23219     } else {
23220       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23221       int vlen_enc = vector_length_encoding(this);
23222       // Multiply each shuffle by two to get byte index
23223       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23224 
23225       // Duplicate to create 2 copies of byte index
23226       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23227       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23228 
23229       // Add one to get alternate byte index
23230       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23231     }
23232   %}
23233   ins_pipe( pipe_slow );
23234 %}
23235 
23236 instruct rearrangeS(vec dst, vec shuffle) %{
23237   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23238             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23239   match(Set dst (VectorRearrange dst shuffle));
23240   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23241   ins_encode %{
23242     assert(UseSSE >= 4, "required");
23243     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23244   %}
23245   ins_pipe( pipe_slow );
23246 %}
23247 
23248 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23249   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23250             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23251   match(Set dst (VectorRearrange src shuffle));
23252   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23253   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23254   ins_encode %{
23255     assert(UseAVX >= 2, "required");
23256     // Swap src into vtmp1
23257     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23258     // Shuffle swapped src to get entries from other 128 bit lane
23259     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23260     // Shuffle original src to get entries from self 128 bit lane
23261     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23262     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23263     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23264     // Perform the blend
23265     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23266   %}
23267   ins_pipe( pipe_slow );
23268 %}
23269 
23270 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23271   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23272             VM_Version::supports_avx512bw());
23273   match(Set dst (VectorRearrange src shuffle));
23274   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23275   ins_encode %{
23276     int vlen_enc = vector_length_encoding(this);
23277     if (!VM_Version::supports_avx512vl()) {
23278       vlen_enc = Assembler::AVX_512bit;
23279     }
23280     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23281   %}
23282   ins_pipe( pipe_slow );
23283 %}
23284 
23285 // LoadShuffle/Rearrange for Integer and Float
23286 
23287 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23288   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23289             Matcher::vector_length(n) == 4 && UseAVX == 0);
23290   match(Set dst (VectorLoadShuffle src));
23291   effect(TEMP dst, TEMP vtmp);
23292   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23293   ins_encode %{
23294     assert(UseSSE >= 4, "required");
23295 
23296     // Create a byte shuffle mask from int shuffle mask
23297     // only byte shuffle instruction available on these platforms
23298 
23299     // Duplicate and multiply each shuffle by 4
23300     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23301     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23302     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23303     __ psllw($vtmp$$XMMRegister, 2);
23304 
23305     // Duplicate again to create 4 copies of byte index
23306     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23307     __ psllw($dst$$XMMRegister, 8);
23308     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23309 
23310     // Add 3,2,1,0 to get alternate byte index
23311     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23312     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23313   %}
23314   ins_pipe( pipe_slow );
23315 %}
23316 
23317 instruct rearrangeI(vec dst, vec shuffle) %{
23318   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23319             UseAVX == 0);
23320   match(Set dst (VectorRearrange dst shuffle));
23321   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23322   ins_encode %{
23323     assert(UseSSE >= 4, "required");
23324     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23325   %}
23326   ins_pipe( pipe_slow );
23327 %}
23328 
23329 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23330   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23331             UseAVX > 0);
23332   match(Set dst (VectorRearrange src shuffle));
23333   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23334   ins_encode %{
23335     int vlen_enc = vector_length_encoding(this);
23336     BasicType bt = Matcher::vector_element_basic_type(this);
23337     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23338   %}
23339   ins_pipe( pipe_slow );
23340 %}
23341 
23342 // LoadShuffle/Rearrange for Long and Double
23343 
23344 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23345   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23346             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23347   match(Set dst (VectorLoadShuffle src));
23348   effect(TEMP dst, TEMP vtmp);
23349   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23350   ins_encode %{
23351     assert(UseAVX >= 2, "required");
23352 
23353     int vlen_enc = vector_length_encoding(this);
23354     // Create a double word shuffle mask from long shuffle mask
23355     // only double word shuffle instruction available on these platforms
23356 
23357     // Multiply each shuffle by two to get double word index
23358     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23359 
23360     // Duplicate each double word shuffle
23361     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23362     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23363 
23364     // Add one to get alternate double word index
23365     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23366   %}
23367   ins_pipe( pipe_slow );
23368 %}
23369 
23370 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23371   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23372             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23373   match(Set dst (VectorRearrange src shuffle));
23374   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23375   ins_encode %{
23376     assert(UseAVX >= 2, "required");
23377 
23378     int vlen_enc = vector_length_encoding(this);
23379     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23380   %}
23381   ins_pipe( pipe_slow );
23382 %}
23383 
23384 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23385   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23386             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23387   match(Set dst (VectorRearrange src shuffle));
23388   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23389   ins_encode %{
23390     assert(UseAVX > 2, "required");
23391 
23392     int vlen_enc = vector_length_encoding(this);
23393     if (vlen_enc == Assembler::AVX_128bit) {
23394       vlen_enc = Assembler::AVX_256bit;
23395     }
23396     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23397   %}
23398   ins_pipe( pipe_slow );
23399 %}
23400 
23401 // --------------------------------- FMA --------------------------------------
23402 // a * b + c
23403 
23404 instruct vfmaF_reg(vec a, vec b, vec c) %{
23405   match(Set c (FmaVF  c (Binary a b)));
23406   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23407   ins_cost(150);
23408   ins_encode %{
23409     assert(UseFMA, "not enabled");
23410     int vlen_enc = vector_length_encoding(this);
23411     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23412   %}
23413   ins_pipe( pipe_slow );
23414 %}
23415 
23416 instruct vfmaF_mem(vec a, memory b, vec c) %{
23417   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23418   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23419   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23420   ins_cost(150);
23421   ins_encode %{
23422     assert(UseFMA, "not enabled");
23423     int vlen_enc = vector_length_encoding(this);
23424     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23425   %}
23426   ins_pipe( pipe_slow );
23427 %}
23428 
23429 instruct vfmaD_reg(vec a, vec b, vec c) %{
23430   match(Set c (FmaVD  c (Binary a b)));
23431   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23432   ins_cost(150);
23433   ins_encode %{
23434     assert(UseFMA, "not enabled");
23435     int vlen_enc = vector_length_encoding(this);
23436     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23437   %}
23438   ins_pipe( pipe_slow );
23439 %}
23440 
23441 instruct vfmaD_mem(vec a, memory b, vec c) %{
23442   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23443   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23444   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23445   ins_cost(150);
23446   ins_encode %{
23447     assert(UseFMA, "not enabled");
23448     int vlen_enc = vector_length_encoding(this);
23449     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23450   %}
23451   ins_pipe( pipe_slow );
23452 %}
23453 
23454 // --------------------------------- Vector Multiply Add --------------------------------------
23455 
23456 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23457   predicate(UseAVX == 0);
23458   match(Set dst (MulAddVS2VI dst src1));
23459   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23460   ins_encode %{
23461     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23462   %}
23463   ins_pipe( pipe_slow );
23464 %}
23465 
23466 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23467   predicate(UseAVX > 0);
23468   match(Set dst (MulAddVS2VI src1 src2));
23469   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23470   ins_encode %{
23471     int vlen_enc = vector_length_encoding(this);
23472     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23473   %}
23474   ins_pipe( pipe_slow );
23475 %}
23476 
23477 // --------------------------------- Vector Multiply Add Add ----------------------------------
23478 
23479 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23480   predicate(VM_Version::supports_avx512_vnni());
23481   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23482   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23483   ins_encode %{
23484     assert(UseAVX > 2, "required");
23485     int vlen_enc = vector_length_encoding(this);
23486     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23487   %}
23488   ins_pipe( pipe_slow );
23489   ins_cost(10);
23490 %}
23491 
23492 // --------------------------------- PopCount --------------------------------------
23493 
23494 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23495   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23496   match(Set dst (PopCountVI src));
23497   match(Set dst (PopCountVL src));
23498   format %{ "vector_popcount_integral $dst, $src" %}
23499   ins_encode %{
23500     int opcode = this->ideal_Opcode();
23501     int vlen_enc = vector_length_encoding(this, $src);
23502     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23503     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23504   %}
23505   ins_pipe( pipe_slow );
23506 %}
23507 
23508 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23509   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23510   match(Set dst (PopCountVI src mask));
23511   match(Set dst (PopCountVL src mask));
23512   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23513   ins_encode %{
23514     int vlen_enc = vector_length_encoding(this, $src);
23515     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23516     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23517     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23518   %}
23519   ins_pipe( pipe_slow );
23520 %}
23521 
23522 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23523   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23524   match(Set dst (PopCountVI src));
23525   match(Set dst (PopCountVL src));
23526   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23527   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23528   ins_encode %{
23529     int opcode = this->ideal_Opcode();
23530     int vlen_enc = vector_length_encoding(this, $src);
23531     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23532     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23533                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23534   %}
23535   ins_pipe( pipe_slow );
23536 %}
23537 
23538 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23539 
23540 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23541   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23542                                               Matcher::vector_length_in_bytes(n->in(1))));
23543   match(Set dst (CountTrailingZerosV src));
23544   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23545   ins_cost(400);
23546   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23547   ins_encode %{
23548     int vlen_enc = vector_length_encoding(this, $src);
23549     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23550     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23551                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23552   %}
23553   ins_pipe( pipe_slow );
23554 %}
23555 
23556 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23557   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23558             VM_Version::supports_avx512cd() &&
23559             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23560   match(Set dst (CountTrailingZerosV src));
23561   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23562   ins_cost(400);
23563   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23564   ins_encode %{
23565     int vlen_enc = vector_length_encoding(this, $src);
23566     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23567     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23568                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23569   %}
23570   ins_pipe( pipe_slow );
23571 %}
23572 
23573 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23574   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23575   match(Set dst (CountTrailingZerosV src));
23576   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23577   ins_cost(400);
23578   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23579   ins_encode %{
23580     int vlen_enc = vector_length_encoding(this, $src);
23581     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23582     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23583                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23584                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23585   %}
23586   ins_pipe( pipe_slow );
23587 %}
23588 
23589 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23590   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23591   match(Set dst (CountTrailingZerosV src));
23592   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23593   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23594   ins_encode %{
23595     int vlen_enc = vector_length_encoding(this, $src);
23596     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23597     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23598                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23599   %}
23600   ins_pipe( pipe_slow );
23601 %}
23602 
23603 
23604 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23605 
23606 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23607   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23608   effect(TEMP dst);
23609   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23610   ins_encode %{
23611     int vector_len = vector_length_encoding(this);
23612     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23613   %}
23614   ins_pipe( pipe_slow );
23615 %}
23616 
23617 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23618   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23619   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23620   effect(TEMP dst);
23621   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23622   ins_encode %{
23623     int vector_len = vector_length_encoding(this);
23624     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23625   %}
23626   ins_pipe( pipe_slow );
23627 %}
23628 
23629 // --------------------------------- Rotation Operations ----------------------------------
23630 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23631   match(Set dst (RotateLeftV src shift));
23632   match(Set dst (RotateRightV src shift));
23633   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23634   ins_encode %{
23635     int opcode      = this->ideal_Opcode();
23636     int vector_len  = vector_length_encoding(this);
23637     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23638     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23639   %}
23640   ins_pipe( pipe_slow );
23641 %}
23642 
23643 instruct vprorate(vec dst, vec src, vec shift) %{
23644   match(Set dst (RotateLeftV src shift));
23645   match(Set dst (RotateRightV src shift));
23646   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23647   ins_encode %{
23648     int opcode      = this->ideal_Opcode();
23649     int vector_len  = vector_length_encoding(this);
23650     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23651     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23652   %}
23653   ins_pipe( pipe_slow );
23654 %}
23655 
23656 // ---------------------------------- Masked Operations ------------------------------------
23657 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23658   predicate(!n->in(3)->bottom_type()->isa_pvectmask());
23659   match(Set dst (LoadVectorMasked mem mask));
23660   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23661   ins_encode %{
23662     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23663     int vlen_enc = vector_length_encoding(this);
23664     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23665   %}
23666   ins_pipe( pipe_slow );
23667 %}
23668 
23669 
23670 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23671   predicate(n->in(3)->bottom_type()->isa_pvectmask());
23672   match(Set dst (LoadVectorMasked mem mask));
23673   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23674   ins_encode %{
23675     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
23676     int vector_len = vector_length_encoding(this);
23677     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23678   %}
23679   ins_pipe( pipe_slow );
23680 %}
23681 
23682 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23683   predicate(!n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23684   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23685   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23686   ins_encode %{
23687     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23688     int vlen_enc = vector_length_encoding(src_node);
23689     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23690     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23691   %}
23692   ins_pipe( pipe_slow );
23693 %}
23694 
23695 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23696   predicate(n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23697   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23698   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23699   ins_encode %{
23700     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23701     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23702     int vlen_enc = vector_length_encoding(src_node);
23703     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23704   %}
23705   ins_pipe( pipe_slow );
23706 %}
23707 
23708 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23709   match(Set addr (VerifyVectorAlignment addr mask));
23710   effect(KILL cr);
23711   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23712   ins_encode %{
23713     Label Lskip;
23714     // check if masked bits of addr are zero
23715     __ testq($addr$$Register, $mask$$constant);
23716     __ jccb(Assembler::equal, Lskip);
23717     __ stop("verify_vector_alignment found a misaligned vector memory access");
23718     __ bind(Lskip);
23719   %}
23720   ins_pipe(pipe_slow);
23721 %}
23722 
23723 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23724   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23725   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23726   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23727   ins_encode %{
23728     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23729     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23730 
23731     Label DONE;
23732     int vlen_enc = vector_length_encoding(this, $src1);
23733     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23734 
23735     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23736     __ mov64($dst$$Register, -1L);
23737     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23738     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23739     __ jccb(Assembler::carrySet, DONE);
23740     __ kmovql($dst$$Register, $ktmp1$$KRegister);
23741     __ notq($dst$$Register);
23742     __ tzcntq($dst$$Register, $dst$$Register);
23743     __ bind(DONE);
23744   %}
23745   ins_pipe( pipe_slow );
23746 %}
23747 
23748 
23749 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23750   match(Set dst (VectorMaskGen len));
23751   effect(TEMP temp, KILL cr);
23752   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23753   ins_encode %{
23754     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23755   %}
23756   ins_pipe( pipe_slow );
23757 %}
23758 
23759 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23760   match(Set dst (VectorMaskGen len));
23761   format %{ "vector_mask_gen $len \t! vector mask generator" %}
23762   effect(TEMP temp);
23763   ins_encode %{
23764     if ($len$$constant > 0) {
23765       __ mov64($temp$$Register, right_n_bits($len$$constant));
23766       __ kmovql($dst$$KRegister, $temp$$Register);
23767     } else {
23768       __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
23769     }
23770   %}
23771   ins_pipe( pipe_slow );
23772 %}
23773 
23774 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23775   predicate(n->in(1)->bottom_type()->isa_pvectmask());
23776   match(Set dst (VectorMaskToLong mask));
23777   effect(TEMP dst, KILL cr);
23778   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23779   ins_encode %{
23780     int opcode = this->ideal_Opcode();
23781     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23782     int mask_len = Matcher::vector_length(this, $mask);
23783     int mask_size = mask_len * type2aelembytes(mbt);
23784     int vlen_enc = vector_length_encoding(this, $mask);
23785     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23786                              $dst$$Register, mask_len, mask_size, vlen_enc);
23787   %}
23788   ins_pipe( pipe_slow );
23789 %}
23790 
23791 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23792   predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23793   match(Set dst (VectorMaskToLong mask));
23794   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23795   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23796   ins_encode %{
23797     int opcode = this->ideal_Opcode();
23798     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23799     int mask_len = Matcher::vector_length(this, $mask);
23800     int vlen_enc = vector_length_encoding(this, $mask);
23801     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23802                              $dst$$Register, mask_len, mbt, vlen_enc);
23803   %}
23804   ins_pipe( pipe_slow );
23805 %}
23806 
23807 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23808   predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23809   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23810   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23811   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23812   ins_encode %{
23813     int opcode = this->ideal_Opcode();
23814     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23815     int mask_len = Matcher::vector_length(this, $mask);
23816     int vlen_enc = vector_length_encoding(this, $mask);
23817     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23818                              $dst$$Register, mask_len, mbt, vlen_enc);
23819   %}
23820   ins_pipe( pipe_slow );
23821 %}
23822 
23823 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23824   predicate(n->in(1)->bottom_type()->isa_pvectmask());
23825   match(Set dst (VectorMaskTrueCount mask));
23826   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23827   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23828   ins_encode %{
23829     int opcode = this->ideal_Opcode();
23830     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23831     int mask_len = Matcher::vector_length(this, $mask);
23832     int mask_size = mask_len * type2aelembytes(mbt);
23833     int vlen_enc = vector_length_encoding(this, $mask);
23834     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23835                              $tmp$$Register, mask_len, mask_size, vlen_enc);
23836   %}
23837   ins_pipe( pipe_slow );
23838 %}
23839 
23840 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23841   predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23842   match(Set dst (VectorMaskTrueCount mask));
23843   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23844   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23845   ins_encode %{
23846     int opcode = this->ideal_Opcode();
23847     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23848     int mask_len = Matcher::vector_length(this, $mask);
23849     int vlen_enc = vector_length_encoding(this, $mask);
23850     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23851                              $tmp$$Register, mask_len, mbt, vlen_enc);
23852   %}
23853   ins_pipe( pipe_slow );
23854 %}
23855 
23856 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23857   predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23858   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23859   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23860   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23861   ins_encode %{
23862     int opcode = this->ideal_Opcode();
23863     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23864     int mask_len = Matcher::vector_length(this, $mask);
23865     int vlen_enc = vector_length_encoding(this, $mask);
23866     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23867                              $tmp$$Register, mask_len, mbt, vlen_enc);
23868   %}
23869   ins_pipe( pipe_slow );
23870 %}
23871 
23872 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23873   predicate(n->in(1)->bottom_type()->isa_pvectmask());
23874   match(Set dst (VectorMaskFirstTrue mask));
23875   match(Set dst (VectorMaskLastTrue mask));
23876   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23877   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23878   ins_encode %{
23879     int opcode = this->ideal_Opcode();
23880     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23881     int mask_len = Matcher::vector_length(this, $mask);
23882     int mask_size = mask_len * type2aelembytes(mbt);
23883     int vlen_enc = vector_length_encoding(this, $mask);
23884     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23885                              $tmp$$Register, mask_len, mask_size, vlen_enc);
23886   %}
23887   ins_pipe( pipe_slow );
23888 %}
23889 
23890 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23891   predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23892   match(Set dst (VectorMaskFirstTrue mask));
23893   match(Set dst (VectorMaskLastTrue mask));
23894   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23895   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23896   ins_encode %{
23897     int opcode = this->ideal_Opcode();
23898     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23899     int mask_len = Matcher::vector_length(this, $mask);
23900     int vlen_enc = vector_length_encoding(this, $mask);
23901     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23902                              $tmp$$Register, mask_len, mbt, vlen_enc);
23903   %}
23904   ins_pipe( pipe_slow );
23905 %}
23906 
23907 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23908   predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23909   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23910   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23911   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23912   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23913   ins_encode %{
23914     int opcode = this->ideal_Opcode();
23915     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23916     int mask_len = Matcher::vector_length(this, $mask);
23917     int vlen_enc = vector_length_encoding(this, $mask);
23918     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23919                              $tmp$$Register, mask_len, mbt, vlen_enc);
23920   %}
23921   ins_pipe( pipe_slow );
23922 %}
23923 
23924 // --------------------------------- Compress/Expand Operations ---------------------------
23925 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23926   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23927   match(Set dst (CompressV src mask));
23928   match(Set dst (ExpandV src mask));
23929   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23930   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23931   ins_encode %{
23932     int opcode = this->ideal_Opcode();
23933     int vlen_enc = vector_length_encoding(this);
23934     BasicType bt  = Matcher::vector_element_basic_type(this);
23935     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23936                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23937   %}
23938   ins_pipe( pipe_slow );
23939 %}
23940 
23941 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23942   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23943   match(Set dst (CompressV src mask));
23944   match(Set dst (ExpandV src mask));
23945   format %{ "vector_compress_expand $dst, $src, $mask" %}
23946   ins_encode %{
23947     int opcode = this->ideal_Opcode();
23948     int vector_len = vector_length_encoding(this);
23949     BasicType bt  = Matcher::vector_element_basic_type(this);
23950     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
23951   %}
23952   ins_pipe( pipe_slow );
23953 %}
23954 
23955 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
23956   match(Set dst (CompressM mask));
23957   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
23958   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
23959   ins_encode %{
23960     assert(this->in(1)->bottom_type()->isa_pvectmask(), "");
23961     int mask_len = Matcher::vector_length(this);
23962     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
23963   %}
23964   ins_pipe( pipe_slow );
23965 %}
23966 
23967 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
23968 
23969 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23970   predicate(!VM_Version::supports_gfni());
23971   match(Set dst (ReverseV src));
23972   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23973   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23974   ins_encode %{
23975     int vec_enc = vector_length_encoding(this);
23976     BasicType bt = Matcher::vector_element_basic_type(this);
23977     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23978                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23979   %}
23980   ins_pipe( pipe_slow );
23981 %}
23982 
23983 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
23984   predicate(VM_Version::supports_gfni());
23985   match(Set dst (ReverseV src));
23986   effect(TEMP dst, TEMP xtmp);
23987   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
23988   ins_encode %{
23989     int vec_enc = vector_length_encoding(this);
23990     BasicType bt  = Matcher::vector_element_basic_type(this);
23991     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
23992     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
23993                                $xtmp$$XMMRegister);
23994   %}
23995   ins_pipe( pipe_slow );
23996 %}
23997 
23998 instruct vreverse_byte_reg(vec dst, vec src) %{
23999   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24000   match(Set dst (ReverseBytesV src));
24001   effect(TEMP dst);
24002   format %{ "vector_reverse_byte $dst, $src" %}
24003   ins_encode %{
24004     int vec_enc = vector_length_encoding(this);
24005     BasicType bt = Matcher::vector_element_basic_type(this);
24006     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24007   %}
24008   ins_pipe( pipe_slow );
24009 %}
24010 
24011 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24012   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24013   match(Set dst (ReverseBytesV src));
24014   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24015   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24016   ins_encode %{
24017     int vec_enc = vector_length_encoding(this);
24018     BasicType bt = Matcher::vector_element_basic_type(this);
24019     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24020                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24021   %}
24022   ins_pipe( pipe_slow );
24023 %}
24024 
24025 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24026 
24027 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24028   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24029                                               Matcher::vector_length_in_bytes(n->in(1))));
24030   match(Set dst (CountLeadingZerosV src));
24031   format %{ "vector_count_leading_zeros $dst, $src" %}
24032   ins_encode %{
24033      int vlen_enc = vector_length_encoding(this, $src);
24034      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24035      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24036                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24037   %}
24038   ins_pipe( pipe_slow );
24039 %}
24040 
24041 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24042   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24043                                               Matcher::vector_length_in_bytes(n->in(1))));
24044   match(Set dst (CountLeadingZerosV src mask));
24045   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24046   ins_encode %{
24047     int vlen_enc = vector_length_encoding(this, $src);
24048     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24049     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24050     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24051                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24052   %}
24053   ins_pipe( pipe_slow );
24054 %}
24055 
24056 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24057   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24058             VM_Version::supports_avx512cd() &&
24059             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24060   match(Set dst (CountLeadingZerosV src));
24061   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24062   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24063   ins_encode %{
24064     int vlen_enc = vector_length_encoding(this, $src);
24065     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24066     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24067                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24068   %}
24069   ins_pipe( pipe_slow );
24070 %}
24071 
24072 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24073   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24074   match(Set dst (CountLeadingZerosV src));
24075   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24076   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24077   ins_encode %{
24078     int vlen_enc = vector_length_encoding(this, $src);
24079     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24080     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24081                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24082                                        $rtmp$$Register, true, vlen_enc);
24083   %}
24084   ins_pipe( pipe_slow );
24085 %}
24086 
24087 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24088   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24089             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24090   match(Set dst (CountLeadingZerosV src));
24091   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24092   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24093   ins_encode %{
24094     int vlen_enc = vector_length_encoding(this, $src);
24095     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24096     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24097                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24098   %}
24099   ins_pipe( pipe_slow );
24100 %}
24101 
24102 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24103   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24104             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24105   match(Set dst (CountLeadingZerosV src));
24106   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24107   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24108   ins_encode %{
24109     int vlen_enc = vector_length_encoding(this, $src);
24110     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24111     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24112                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24113   %}
24114   ins_pipe( pipe_slow );
24115 %}
24116 
24117 // ---------------------------------- Vector Masked Operations ------------------------------------
24118 
24119 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24120   match(Set dst (AddVB (Binary dst src2) mask));
24121   match(Set dst (AddVS (Binary dst src2) mask));
24122   match(Set dst (AddVI (Binary dst src2) mask));
24123   match(Set dst (AddVL (Binary dst src2) mask));
24124   match(Set dst (AddVF (Binary dst src2) mask));
24125   match(Set dst (AddVD (Binary dst src2) mask));
24126   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24127   ins_encode %{
24128     int vlen_enc = vector_length_encoding(this);
24129     BasicType bt = Matcher::vector_element_basic_type(this);
24130     int opc = this->ideal_Opcode();
24131     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24132                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24133   %}
24134   ins_pipe( pipe_slow );
24135 %}
24136 
24137 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24138   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24139   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24140   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24141   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24142   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24143   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24144   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24145   ins_encode %{
24146     int vlen_enc = vector_length_encoding(this);
24147     BasicType bt = Matcher::vector_element_basic_type(this);
24148     int opc = this->ideal_Opcode();
24149     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24150                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24151   %}
24152   ins_pipe( pipe_slow );
24153 %}
24154 
24155 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24156   match(Set dst (XorV (Binary dst src2) mask));
24157   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24158   ins_encode %{
24159     int vlen_enc = vector_length_encoding(this);
24160     BasicType bt = Matcher::vector_element_basic_type(this);
24161     int opc = this->ideal_Opcode();
24162     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24163                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24164   %}
24165   ins_pipe( pipe_slow );
24166 %}
24167 
24168 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24169   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24170   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24171   ins_encode %{
24172     int vlen_enc = vector_length_encoding(this);
24173     BasicType bt = Matcher::vector_element_basic_type(this);
24174     int opc = this->ideal_Opcode();
24175     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24176                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24177   %}
24178   ins_pipe( pipe_slow );
24179 %}
24180 
24181 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24182   match(Set dst (OrV (Binary dst src2) mask));
24183   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24184   ins_encode %{
24185     int vlen_enc = vector_length_encoding(this);
24186     BasicType bt = Matcher::vector_element_basic_type(this);
24187     int opc = this->ideal_Opcode();
24188     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24189                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24190   %}
24191   ins_pipe( pipe_slow );
24192 %}
24193 
24194 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24195   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24196   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24197   ins_encode %{
24198     int vlen_enc = vector_length_encoding(this);
24199     BasicType bt = Matcher::vector_element_basic_type(this);
24200     int opc = this->ideal_Opcode();
24201     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24202                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24203   %}
24204   ins_pipe( pipe_slow );
24205 %}
24206 
24207 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24208   match(Set dst (AndV (Binary dst src2) mask));
24209   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24210   ins_encode %{
24211     int vlen_enc = vector_length_encoding(this);
24212     BasicType bt = Matcher::vector_element_basic_type(this);
24213     int opc = this->ideal_Opcode();
24214     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24215                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24216   %}
24217   ins_pipe( pipe_slow );
24218 %}
24219 
24220 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24221   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24222   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24223   ins_encode %{
24224     int vlen_enc = vector_length_encoding(this);
24225     BasicType bt = Matcher::vector_element_basic_type(this);
24226     int opc = this->ideal_Opcode();
24227     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24228                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24229   %}
24230   ins_pipe( pipe_slow );
24231 %}
24232 
24233 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24234   match(Set dst (SubVB (Binary dst src2) mask));
24235   match(Set dst (SubVS (Binary dst src2) mask));
24236   match(Set dst (SubVI (Binary dst src2) mask));
24237   match(Set dst (SubVL (Binary dst src2) mask));
24238   match(Set dst (SubVF (Binary dst src2) mask));
24239   match(Set dst (SubVD (Binary dst src2) mask));
24240   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24241   ins_encode %{
24242     int vlen_enc = vector_length_encoding(this);
24243     BasicType bt = Matcher::vector_element_basic_type(this);
24244     int opc = this->ideal_Opcode();
24245     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24246                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24247   %}
24248   ins_pipe( pipe_slow );
24249 %}
24250 
24251 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24252   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24253   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24254   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24255   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24256   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24257   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24258   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24259   ins_encode %{
24260     int vlen_enc = vector_length_encoding(this);
24261     BasicType bt = Matcher::vector_element_basic_type(this);
24262     int opc = this->ideal_Opcode();
24263     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24264                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24265   %}
24266   ins_pipe( pipe_slow );
24267 %}
24268 
24269 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24270   match(Set dst (MulVS (Binary dst src2) mask));
24271   match(Set dst (MulVI (Binary dst src2) mask));
24272   match(Set dst (MulVL (Binary dst src2) mask));
24273   match(Set dst (MulVF (Binary dst src2) mask));
24274   match(Set dst (MulVD (Binary dst src2) mask));
24275   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24276   ins_encode %{
24277     int vlen_enc = vector_length_encoding(this);
24278     BasicType bt = Matcher::vector_element_basic_type(this);
24279     int opc = this->ideal_Opcode();
24280     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24281                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24282   %}
24283   ins_pipe( pipe_slow );
24284 %}
24285 
24286 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24287   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24288   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24289   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24290   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24291   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24292   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24293   ins_encode %{
24294     int vlen_enc = vector_length_encoding(this);
24295     BasicType bt = Matcher::vector_element_basic_type(this);
24296     int opc = this->ideal_Opcode();
24297     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24298                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24299   %}
24300   ins_pipe( pipe_slow );
24301 %}
24302 
24303 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24304   match(Set dst (SqrtVF dst mask));
24305   match(Set dst (SqrtVD dst mask));
24306   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24307   ins_encode %{
24308     int vlen_enc = vector_length_encoding(this);
24309     BasicType bt = Matcher::vector_element_basic_type(this);
24310     int opc = this->ideal_Opcode();
24311     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24312                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24313   %}
24314   ins_pipe( pipe_slow );
24315 %}
24316 
24317 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24318   match(Set dst (DivVF (Binary dst src2) mask));
24319   match(Set dst (DivVD (Binary dst src2) mask));
24320   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24321   ins_encode %{
24322     int vlen_enc = vector_length_encoding(this);
24323     BasicType bt = Matcher::vector_element_basic_type(this);
24324     int opc = this->ideal_Opcode();
24325     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24326                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24327   %}
24328   ins_pipe( pipe_slow );
24329 %}
24330 
24331 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24332   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24333   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24334   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24335   ins_encode %{
24336     int vlen_enc = vector_length_encoding(this);
24337     BasicType bt = Matcher::vector_element_basic_type(this);
24338     int opc = this->ideal_Opcode();
24339     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24340                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24341   %}
24342   ins_pipe( pipe_slow );
24343 %}
24344 
24345 
24346 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24347   match(Set dst (RotateLeftV (Binary dst shift) mask));
24348   match(Set dst (RotateRightV (Binary dst shift) mask));
24349   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24350   ins_encode %{
24351     int vlen_enc = vector_length_encoding(this);
24352     BasicType bt = Matcher::vector_element_basic_type(this);
24353     int opc = this->ideal_Opcode();
24354     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24355                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24356   %}
24357   ins_pipe( pipe_slow );
24358 %}
24359 
24360 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24361   match(Set dst (RotateLeftV (Binary dst src2) mask));
24362   match(Set dst (RotateRightV (Binary dst src2) mask));
24363   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24364   ins_encode %{
24365     int vlen_enc = vector_length_encoding(this);
24366     BasicType bt = Matcher::vector_element_basic_type(this);
24367     int opc = this->ideal_Opcode();
24368     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24369                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24370   %}
24371   ins_pipe( pipe_slow );
24372 %}
24373 
24374 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24375   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24376   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24377   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24378   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24379   ins_encode %{
24380     int vlen_enc = vector_length_encoding(this);
24381     BasicType bt = Matcher::vector_element_basic_type(this);
24382     int opc = this->ideal_Opcode();
24383     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24384                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24385   %}
24386   ins_pipe( pipe_slow );
24387 %}
24388 
24389 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24390   predicate(!n->as_ShiftV()->is_var_shift());
24391   match(Set dst (LShiftVS (Binary dst src2) mask));
24392   match(Set dst (LShiftVI (Binary dst src2) mask));
24393   match(Set dst (LShiftVL (Binary dst src2) mask));
24394   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24395   ins_encode %{
24396     int vlen_enc = vector_length_encoding(this);
24397     BasicType bt = Matcher::vector_element_basic_type(this);
24398     int opc = this->ideal_Opcode();
24399     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24400                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24401   %}
24402   ins_pipe( pipe_slow );
24403 %}
24404 
24405 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24406   predicate(n->as_ShiftV()->is_var_shift());
24407   match(Set dst (LShiftVS (Binary dst src2) mask));
24408   match(Set dst (LShiftVI (Binary dst src2) mask));
24409   match(Set dst (LShiftVL (Binary dst src2) mask));
24410   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24411   ins_encode %{
24412     int vlen_enc = vector_length_encoding(this);
24413     BasicType bt = Matcher::vector_element_basic_type(this);
24414     int opc = this->ideal_Opcode();
24415     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24416                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24417   %}
24418   ins_pipe( pipe_slow );
24419 %}
24420 
24421 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24422   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24423   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24424   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24425   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24426   ins_encode %{
24427     int vlen_enc = vector_length_encoding(this);
24428     BasicType bt = Matcher::vector_element_basic_type(this);
24429     int opc = this->ideal_Opcode();
24430     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24431                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24432   %}
24433   ins_pipe( pipe_slow );
24434 %}
24435 
24436 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24437   predicate(!n->as_ShiftV()->is_var_shift());
24438   match(Set dst (RShiftVS (Binary dst src2) mask));
24439   match(Set dst (RShiftVI (Binary dst src2) mask));
24440   match(Set dst (RShiftVL (Binary dst src2) mask));
24441   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24442   ins_encode %{
24443     int vlen_enc = vector_length_encoding(this);
24444     BasicType bt = Matcher::vector_element_basic_type(this);
24445     int opc = this->ideal_Opcode();
24446     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24447                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24448   %}
24449   ins_pipe( pipe_slow );
24450 %}
24451 
24452 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24453   predicate(n->as_ShiftV()->is_var_shift());
24454   match(Set dst (RShiftVS (Binary dst src2) mask));
24455   match(Set dst (RShiftVI (Binary dst src2) mask));
24456   match(Set dst (RShiftVL (Binary dst src2) mask));
24457   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24458   ins_encode %{
24459     int vlen_enc = vector_length_encoding(this);
24460     BasicType bt = Matcher::vector_element_basic_type(this);
24461     int opc = this->ideal_Opcode();
24462     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24463                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24464   %}
24465   ins_pipe( pipe_slow );
24466 %}
24467 
24468 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24469   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24470   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24471   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24472   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24473   ins_encode %{
24474     int vlen_enc = vector_length_encoding(this);
24475     BasicType bt = Matcher::vector_element_basic_type(this);
24476     int opc = this->ideal_Opcode();
24477     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24478                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24479   %}
24480   ins_pipe( pipe_slow );
24481 %}
24482 
24483 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24484   predicate(!n->as_ShiftV()->is_var_shift());
24485   match(Set dst (URShiftVS (Binary dst src2) mask));
24486   match(Set dst (URShiftVI (Binary dst src2) mask));
24487   match(Set dst (URShiftVL (Binary dst src2) mask));
24488   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24489   ins_encode %{
24490     int vlen_enc = vector_length_encoding(this);
24491     BasicType bt = Matcher::vector_element_basic_type(this);
24492     int opc = this->ideal_Opcode();
24493     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24494                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24495   %}
24496   ins_pipe( pipe_slow );
24497 %}
24498 
24499 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24500   predicate(n->as_ShiftV()->is_var_shift());
24501   match(Set dst (URShiftVS (Binary dst src2) mask));
24502   match(Set dst (URShiftVI (Binary dst src2) mask));
24503   match(Set dst (URShiftVL (Binary dst src2) mask));
24504   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24505   ins_encode %{
24506     int vlen_enc = vector_length_encoding(this);
24507     BasicType bt = Matcher::vector_element_basic_type(this);
24508     int opc = this->ideal_Opcode();
24509     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24510                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24511   %}
24512   ins_pipe( pipe_slow );
24513 %}
24514 
24515 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24516   match(Set dst (MaxV (Binary dst src2) mask));
24517   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24518   ins_encode %{
24519     int vlen_enc = vector_length_encoding(this);
24520     BasicType bt = Matcher::vector_element_basic_type(this);
24521     int opc = this->ideal_Opcode();
24522     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24523                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24524   %}
24525   ins_pipe( pipe_slow );
24526 %}
24527 
24528 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24529   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24530   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24531   ins_encode %{
24532     int vlen_enc = vector_length_encoding(this);
24533     BasicType bt = Matcher::vector_element_basic_type(this);
24534     int opc = this->ideal_Opcode();
24535     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24536                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24537   %}
24538   ins_pipe( pipe_slow );
24539 %}
24540 
24541 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24542   match(Set dst (MinV (Binary dst src2) mask));
24543   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24544   ins_encode %{
24545     int vlen_enc = vector_length_encoding(this);
24546     BasicType bt = Matcher::vector_element_basic_type(this);
24547     int opc = this->ideal_Opcode();
24548     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24549                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24550   %}
24551   ins_pipe( pipe_slow );
24552 %}
24553 
24554 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24555   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24556   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24557   ins_encode %{
24558     int vlen_enc = vector_length_encoding(this);
24559     BasicType bt = Matcher::vector_element_basic_type(this);
24560     int opc = this->ideal_Opcode();
24561     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24562                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24563   %}
24564   ins_pipe( pipe_slow );
24565 %}
24566 
24567 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24568   match(Set dst (VectorRearrange (Binary dst src2) mask));
24569   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24570   ins_encode %{
24571     int vlen_enc = vector_length_encoding(this);
24572     BasicType bt = Matcher::vector_element_basic_type(this);
24573     int opc = this->ideal_Opcode();
24574     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24575                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24576   %}
24577   ins_pipe( pipe_slow );
24578 %}
24579 
24580 instruct vabs_masked(vec dst, kReg mask) %{
24581   match(Set dst (AbsVB dst mask));
24582   match(Set dst (AbsVS dst mask));
24583   match(Set dst (AbsVI dst mask));
24584   match(Set dst (AbsVL dst mask));
24585   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24586   ins_encode %{
24587     int vlen_enc = vector_length_encoding(this);
24588     BasicType bt = Matcher::vector_element_basic_type(this);
24589     int opc = this->ideal_Opcode();
24590     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24591                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24592   %}
24593   ins_pipe( pipe_slow );
24594 %}
24595 
24596 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24597   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24598   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24599   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24600   ins_encode %{
24601     assert(UseFMA, "Needs FMA instructions support.");
24602     int vlen_enc = vector_length_encoding(this);
24603     BasicType bt = Matcher::vector_element_basic_type(this);
24604     int opc = this->ideal_Opcode();
24605     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24606                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24607   %}
24608   ins_pipe( pipe_slow );
24609 %}
24610 
24611 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24612   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24613   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24614   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24615   ins_encode %{
24616     assert(UseFMA, "Needs FMA instructions support.");
24617     int vlen_enc = vector_length_encoding(this);
24618     BasicType bt = Matcher::vector_element_basic_type(this);
24619     int opc = this->ideal_Opcode();
24620     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24621                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24622   %}
24623   ins_pipe( pipe_slow );
24624 %}
24625 
24626 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24627   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24628   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24629   ins_encode %{
24630     assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
24631     int vlen_enc = vector_length_encoding(this, $src1);
24632     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24633 
24634     // Comparison i
24635     switch (src1_elem_bt) {
24636       case T_BYTE: {
24637         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24638         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24639         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24640         break;
24641       }
24642       case T_SHORT: {
24643         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24644         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24645         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24646         break;
24647       }
24648       case T_INT: {
24649         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24650         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24651         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24652         break;
24653       }
24654       case T_LONG: {
24655         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24656         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24657         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24658         break;
24659       }
24660       case T_FLOAT: {
24661         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24662         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24663         break;
24664       }
24665       case T_DOUBLE: {
24666         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24667         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24668         break;
24669       }
24670       default: assert(false, "%s", type2name(src1_elem_bt)); break;
24671     }
24672   %}
24673   ins_pipe( pipe_slow );
24674 %}
24675 
24676 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24677   predicate(Matcher::vector_length(n) <= 32);
24678   match(Set dst (MaskAll src));
24679   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24680   ins_encode %{
24681     int mask_len = Matcher::vector_length(this);
24682     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24683   %}
24684   ins_pipe( pipe_slow );
24685 %}
24686 
24687 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24688   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24689   match(Set dst (XorVMask src (MaskAll cnt)));
24690   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24691   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24692   ins_encode %{
24693     uint masklen = Matcher::vector_length(this);
24694     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24695   %}
24696   ins_pipe( pipe_slow );
24697 %}
24698 
24699 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24700   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24701             (Matcher::vector_length(n) == 16) ||
24702             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24703   match(Set dst (XorVMask src (MaskAll cnt)));
24704   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24705   ins_encode %{
24706     uint masklen = Matcher::vector_length(this);
24707     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24708   %}
24709   ins_pipe( pipe_slow );
24710 %}
24711 
24712 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2) %{
24713   predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) <= 8);
24714   match(Set dst (VectorLongToMask src));
24715   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2);
24716   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2" %}
24717   ins_encode %{
24718     int mask_len = Matcher::vector_length(this);
24719     int vec_enc  = vector_length_encoding(mask_len);
24720     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24721                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24722   %}
24723   ins_pipe( pipe_slow );
24724 %}
24725 
24726 
24727 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24728   predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) > 8);
24729   match(Set dst (VectorLongToMask src));
24730   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24731   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24732   ins_encode %{
24733     int mask_len = Matcher::vector_length(this);
24734     assert(mask_len <= 32, "invalid mask length");
24735     int vec_enc  = vector_length_encoding(mask_len);
24736     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24737                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24738   %}
24739   ins_pipe( pipe_slow );
24740 %}
24741 
24742 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24743   predicate(n->bottom_type()->isa_pvectmask());
24744   match(Set dst (VectorLongToMask src));
24745   format %{ "long_to_mask_evex $dst, $src\t!" %}
24746   ins_encode %{
24747     __ kmov($dst$$KRegister, $src$$Register);
24748   %}
24749   ins_pipe( pipe_slow );
24750 %}
24751 
24752 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24753   match(Set dst (AndVMask src1 src2));
24754   match(Set dst (OrVMask src1 src2));
24755   match(Set dst (XorVMask src1 src2));
24756   effect(TEMP kscratch);
24757   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24758   ins_encode %{
24759     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24760     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24761     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24762     uint masklen = Matcher::vector_length(this);
24763     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24764     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24765   %}
24766   ins_pipe( pipe_slow );
24767 %}
24768 
24769 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24770   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24771   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24772   ins_encode %{
24773     int vlen_enc = vector_length_encoding(this);
24774     BasicType bt = Matcher::vector_element_basic_type(this);
24775     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24776                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24777   %}
24778   ins_pipe( pipe_slow );
24779 %}
24780 
24781 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24782   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24783   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24784   ins_encode %{
24785     int vlen_enc = vector_length_encoding(this);
24786     BasicType bt = Matcher::vector_element_basic_type(this);
24787     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24788                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24789   %}
24790   ins_pipe( pipe_slow );
24791 %}
24792 
24793 instruct castMM(kReg dst)
24794 %{
24795   match(Set dst (CastVV dst));
24796 
24797   size(0);
24798   format %{ "# castVV of $dst" %}
24799   ins_encode(/* empty encoding */);
24800   ins_cost(0);
24801   ins_pipe(empty);
24802 %}
24803 
24804 instruct castVV(vec dst)
24805 %{
24806   match(Set dst (CastVV dst));
24807 
24808   size(0);
24809   format %{ "# castVV of $dst" %}
24810   ins_encode(/* empty encoding */);
24811   ins_cost(0);
24812   ins_pipe(empty);
24813 %}
24814 
24815 instruct castVVLeg(legVec dst)
24816 %{
24817   match(Set dst (CastVV dst));
24818 
24819   size(0);
24820   format %{ "# castVV of $dst" %}
24821   ins_encode(/* empty encoding */);
24822   ins_cost(0);
24823   ins_pipe(empty);
24824 %}
24825 
24826 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24827 %{
24828   match(Set dst (IsInfiniteF src));
24829   effect(TEMP ktmp, KILL cr);
24830   format %{ "float_class_check $dst, $src" %}
24831   ins_encode %{
24832     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24833     __ kmovbl($dst$$Register, $ktmp$$KRegister);
24834   %}
24835   ins_pipe(pipe_slow);
24836 %}
24837 
24838 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24839 %{
24840   match(Set dst (IsInfiniteD src));
24841   effect(TEMP ktmp, KILL cr);
24842   format %{ "double_class_check $dst, $src" %}
24843   ins_encode %{
24844     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24845     __ kmovbl($dst$$Register, $ktmp$$KRegister);
24846   %}
24847   ins_pipe(pipe_slow);
24848 %}
24849 
24850 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24851 %{
24852   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24853             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24854   match(Set dst (SaturatingAddV src1 src2));
24855   match(Set dst (SaturatingSubV src1 src2));
24856   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24857   ins_encode %{
24858     int vlen_enc = vector_length_encoding(this);
24859     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24860     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24861                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24862   %}
24863   ins_pipe(pipe_slow);
24864 %}
24865 
24866 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24867 %{
24868   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24869             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24870   match(Set dst (SaturatingAddV src1 src2));
24871   match(Set dst (SaturatingSubV src1 src2));
24872   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24873   ins_encode %{
24874     int vlen_enc = vector_length_encoding(this);
24875     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24876     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24877                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24878   %}
24879   ins_pipe(pipe_slow);
24880 %}
24881 
24882 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24883 %{
24884   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24885             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24886             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24887   match(Set dst (SaturatingAddV src1 src2));
24888   match(Set dst (SaturatingSubV src1 src2));
24889   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24890   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24891   ins_encode %{
24892     int vlen_enc = vector_length_encoding(this);
24893     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24894     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24895                                         $src1$$XMMRegister, $src2$$XMMRegister,
24896                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24897                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24898   %}
24899   ins_pipe(pipe_slow);
24900 %}
24901 
24902 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24903 %{
24904   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24905             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24906             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24907   match(Set dst (SaturatingAddV src1 src2));
24908   match(Set dst (SaturatingSubV src1 src2));
24909   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24910   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24911   ins_encode %{
24912     int vlen_enc = vector_length_encoding(this);
24913     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24914     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24915                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24916                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24917   %}
24918   ins_pipe(pipe_slow);
24919 %}
24920 
24921 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24922 %{
24923   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24924             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24925             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24926   match(Set dst (SaturatingAddV src1 src2));
24927   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24928   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24929   ins_encode %{
24930     int vlen_enc = vector_length_encoding(this);
24931     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24932     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24933                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24934   %}
24935   ins_pipe(pipe_slow);
24936 %}
24937 
24938 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24939 %{
24940   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24941             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24942             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24943   match(Set dst (SaturatingAddV src1 src2));
24944   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24945   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24946   ins_encode %{
24947     int vlen_enc = vector_length_encoding(this);
24948     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24949     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24950                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
24951   %}
24952   ins_pipe(pipe_slow);
24953 %}
24954 
24955 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
24956 %{
24957   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24958             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24959             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24960   match(Set dst (SaturatingSubV src1 src2));
24961   effect(TEMP ktmp);
24962   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
24963   ins_encode %{
24964     int vlen_enc = vector_length_encoding(this);
24965     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24966     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24967                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24968   %}
24969   ins_pipe(pipe_slow);
24970 %}
24971 
24972 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
24973 %{
24974   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24975             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24976             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24977   match(Set dst (SaturatingSubV src1 src2));
24978   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24979   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
24980   ins_encode %{
24981     int vlen_enc = vector_length_encoding(this);
24982     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24983     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24984                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
24985   %}
24986   ins_pipe(pipe_slow);
24987 %}
24988 
24989 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
24990 %{
24991   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24992             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24993   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24994   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24995   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24996   ins_encode %{
24997     int vlen_enc = vector_length_encoding(this);
24998     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24999     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25000                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25001   %}
25002   ins_pipe(pipe_slow);
25003 %}
25004 
25005 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25006 %{
25007   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25008             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25009   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25010   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25011   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25012   ins_encode %{
25013     int vlen_enc = vector_length_encoding(this);
25014     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25015     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25016                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25017   %}
25018   ins_pipe(pipe_slow);
25019 %}
25020 
25021 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25022   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25023             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25024   match(Set dst (SaturatingAddV (Binary dst src) mask));
25025   match(Set dst (SaturatingSubV (Binary dst src) mask));
25026   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25027   ins_encode %{
25028     int vlen_enc = vector_length_encoding(this);
25029     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25030     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25031                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25032   %}
25033   ins_pipe( pipe_slow );
25034 %}
25035 
25036 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25037   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25038             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25039   match(Set dst (SaturatingAddV (Binary dst src) mask));
25040   match(Set dst (SaturatingSubV (Binary dst src) mask));
25041   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25042   ins_encode %{
25043     int vlen_enc = vector_length_encoding(this);
25044     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25045     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25046                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25047   %}
25048   ins_pipe( pipe_slow );
25049 %}
25050 
25051 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25052   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25053             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25054   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25055   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25056   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25057   ins_encode %{
25058     int vlen_enc = vector_length_encoding(this);
25059     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25060     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25061                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25062   %}
25063   ins_pipe( pipe_slow );
25064 %}
25065 
25066 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25067   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25068             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25069   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25070   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25071   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25072   ins_encode %{
25073     int vlen_enc = vector_length_encoding(this);
25074     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25075     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25076                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25077   %}
25078   ins_pipe( pipe_slow );
25079 %}
25080 
25081 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25082 %{
25083   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25084   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25085   ins_encode %{
25086     int vlen_enc = vector_length_encoding(this);
25087     BasicType bt = Matcher::vector_element_basic_type(this);
25088     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25089   %}
25090   ins_pipe(pipe_slow);
25091 %}
25092 
25093 instruct reinterpretS2HF(regF dst, rRegI src)
25094 %{
25095   match(Set dst (ReinterpretS2HF src));
25096   format %{ "evmovw $dst, $src" %}
25097   ins_encode %{
25098     __ evmovw($dst$$XMMRegister, $src$$Register);
25099   %}
25100   ins_pipe(pipe_slow);
25101 %}
25102 
25103 instruct reinterpretHF2S(rRegI dst, regF src)
25104 %{
25105   match(Set dst (ReinterpretHF2S src));
25106   format %{ "evmovw $dst, $src" %}
25107   ins_encode %{
25108     __ evmovw($dst$$Register, $src$$XMMRegister);
25109     __ narrow_subword_type($dst$$Register, T_SHORT);
25110   %}
25111   ins_pipe(pipe_slow);
25112 %}
25113 
25114 instruct convF2HFAndS2HF(regF dst, regF src)
25115 %{
25116   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25117   format %{ "convF2HFAndS2HF $dst, $src" %}
25118   ins_encode %{
25119     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25120   %}
25121   ins_pipe(pipe_slow);
25122 %}
25123 
25124 instruct convHF2SAndHF2F(regF dst, regF src)
25125 %{
25126   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25127   format %{ "convHF2SAndHF2F $dst, $src" %}
25128   ins_encode %{
25129     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25130   %}
25131   ins_pipe(pipe_slow);
25132 %}
25133 
25134 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25135 %{
25136   match(Set dst (SqrtHF src));
25137   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25138   ins_encode %{
25139     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25140   %}
25141   ins_pipe(pipe_slow);
25142 %}
25143 
25144 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25145 %{
25146   match(Set dst (AddHF src1 src2));
25147   match(Set dst (DivHF src1 src2));
25148   match(Set dst (MulHF src1 src2));
25149   match(Set dst (SubHF src1 src2));
25150   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25151   ins_encode %{
25152     int opcode = this->ideal_Opcode();
25153     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25154   %}
25155   ins_pipe(pipe_slow);
25156 %}
25157 
25158 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25159 %{
25160   predicate(VM_Version::supports_avx10_2());
25161   match(Set dst (MaxHF src1 src2));
25162   match(Set dst (MinHF src1 src2));
25163 
25164   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25165   ins_encode %{
25166     int opcode = this->ideal_Opcode();
25167     __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
25168   %}
25169   ins_pipe( pipe_slow );
25170 %}
25171 
25172 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25173 %{
25174   predicate(!VM_Version::supports_avx10_2());
25175   match(Set dst (MaxHF src1 src2));
25176   match(Set dst (MinHF src1 src2));
25177   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25178 
25179   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25180   ins_encode %{
25181     int opcode = this->ideal_Opcode();
25182     __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25183                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25184   %}
25185   ins_pipe( pipe_slow );
25186 %}
25187 
25188 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25189 %{
25190   match(Set dst (FmaHF  src2 (Binary dst src1)));
25191   effect(DEF dst);
25192   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25193   ins_encode %{
25194     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25195   %}
25196   ins_pipe( pipe_slow );
25197 %}
25198 
25199 
25200 instruct vector_sqrt_HF_reg(vec dst, vec src)
25201 %{
25202   match(Set dst (SqrtVHF src));
25203   format %{ "vector_sqrt_fp16 $dst, $src" %}
25204   ins_encode %{
25205     int vlen_enc = vector_length_encoding(this);
25206     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25207   %}
25208   ins_pipe(pipe_slow);
25209 %}
25210 
25211 instruct vector_sqrt_HF_mem(vec dst, memory src)
25212 %{
25213   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25214   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25215   ins_encode %{
25216     int vlen_enc = vector_length_encoding(this);
25217     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25218   %}
25219   ins_pipe(pipe_slow);
25220 %}
25221 
25222 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25223 %{
25224   match(Set dst (AddVHF src1 src2));
25225   match(Set dst (DivVHF src1 src2));
25226   match(Set dst (MulVHF src1 src2));
25227   match(Set dst (SubVHF src1 src2));
25228   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25229   ins_encode %{
25230     int vlen_enc = vector_length_encoding(this);
25231     int opcode = this->ideal_Opcode();
25232     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25233   %}
25234   ins_pipe(pipe_slow);
25235 %}
25236 
25237 
25238 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25239 %{
25240   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25241   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25242   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25243   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25244   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25245   ins_encode %{
25246     int vlen_enc = vector_length_encoding(this);
25247     int opcode = this->ideal_Opcode();
25248     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25249   %}
25250   ins_pipe(pipe_slow);
25251 %}
25252 
25253 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25254 %{
25255   match(Set dst (FmaVHF src2 (Binary dst src1)));
25256   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25257   ins_encode %{
25258     int vlen_enc = vector_length_encoding(this);
25259     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25260   %}
25261   ins_pipe( pipe_slow );
25262 %}
25263 
25264 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25265 %{
25266   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25267   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25268   ins_encode %{
25269     int vlen_enc = vector_length_encoding(this);
25270     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25271   %}
25272   ins_pipe( pipe_slow );
25273 %}
25274 
25275 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25276 %{
25277   predicate(VM_Version::supports_avx10_2());
25278   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25279   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25280   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25281   ins_encode %{
25282     int vlen_enc = vector_length_encoding(this);
25283     int opcode = this->ideal_Opcode();
25284     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
25285                             k0, vlen_enc);
25286   %}
25287   ins_pipe( pipe_slow );
25288 %}
25289 
25290 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25291 %{
25292   predicate(VM_Version::supports_avx10_2());
25293   match(Set dst (MinVHF src1 src2));
25294   match(Set dst (MaxVHF src1 src2));
25295   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25296   ins_encode %{
25297     int vlen_enc = vector_length_encoding(this);
25298     int opcode = this->ideal_Opcode();
25299     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25300                             k0, vlen_enc);
25301   %}
25302   ins_pipe( pipe_slow );
25303 %}
25304 
25305 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25306 %{
25307   predicate(!VM_Version::supports_avx10_2());
25308   match(Set dst (MinVHF src1 src2));
25309   match(Set dst (MaxVHF src1 src2));
25310   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25311   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25312   ins_encode %{
25313     int vlen_enc = vector_length_encoding(this);
25314     int opcode = this->ideal_Opcode();
25315     __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25316                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25317   %}
25318   ins_pipe( pipe_slow );
25319 %}
25320 
25321 //----------PEEPHOLE RULES-----------------------------------------------------
25322 // These must follow all instruction definitions as they use the names
25323 // defined in the instructions definitions.
25324 //
25325 // peeppredicate ( rule_predicate );
25326 // // the predicate unless which the peephole rule will be ignored
25327 //
25328 // peepmatch ( root_instr_name [preceding_instruction]* );
25329 //
25330 // peepprocedure ( procedure_name );
25331 // // provide a procedure name to perform the optimization, the procedure should
25332 // // reside in the architecture dependent peephole file, the method has the
25333 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25334 // // with the arguments being the basic block, the current node index inside the
25335 // // block, the register allocator, the functions upon invoked return a new node
25336 // // defined in peepreplace, and the rules of the nodes appearing in the
25337 // // corresponding peepmatch, the function return true if successful, else
25338 // // return false
25339 //
25340 // peepconstraint %{
25341 // (instruction_number.operand_name relational_op instruction_number.operand_name
25342 //  [, ...] );
25343 // // instruction numbers are zero-based using left to right order in peepmatch
25344 //
25345 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25346 // // provide an instruction_number.operand_name for each operand that appears
25347 // // in the replacement instruction's match rule
25348 //
25349 // ---------VM FLAGS---------------------------------------------------------
25350 //
25351 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25352 //
25353 // Each peephole rule is given an identifying number starting with zero and
25354 // increasing by one in the order seen by the parser.  An individual peephole
25355 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25356 // on the command-line.
25357 //
25358 // ---------CURRENT LIMITATIONS----------------------------------------------
25359 //
25360 // Only transformations inside a basic block (do we need more for peephole)
25361 //
25362 // ---------EXAMPLE----------------------------------------------------------
25363 //
25364 // // pertinent parts of existing instructions in architecture description
25365 // instruct movI(rRegI dst, rRegI src)
25366 // %{
25367 //   match(Set dst (CopyI src));
25368 // %}
25369 //
25370 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25371 // %{
25372 //   match(Set dst (AddI dst src));
25373 //   effect(KILL cr);
25374 // %}
25375 //
25376 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25377 // %{
25378 //   match(Set dst (AddI dst src));
25379 // %}
25380 //
25381 // 1. Simple replacement
25382 // - Only match adjacent instructions in same basic block
25383 // - Only equality constraints
25384 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25385 // - Only one replacement instruction
25386 //
25387 // // Change (inc mov) to lea
25388 // peephole %{
25389 //   // lea should only be emitted when beneficial
25390 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25391 //   // increment preceded by register-register move
25392 //   peepmatch ( incI_rReg movI );
25393 //   // require that the destination register of the increment
25394 //   // match the destination register of the move
25395 //   peepconstraint ( 0.dst == 1.dst );
25396 //   // construct a replacement instruction that sets
25397 //   // the destination to ( move's source register + one )
25398 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25399 // %}
25400 //
25401 // 2. Procedural replacement
25402 // - More flexible finding relevent nodes
25403 // - More flexible constraints
25404 // - More flexible transformations
25405 // - May utilise architecture-dependent API more effectively
25406 // - Currently only one replacement instruction due to adlc parsing capabilities
25407 //
25408 // // Change (inc mov) to lea
25409 // peephole %{
25410 //   // lea should only be emitted when beneficial
25411 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25412 //   // the rule numbers of these nodes inside are passed into the function below
25413 //   peepmatch ( incI_rReg movI );
25414 //   // the method that takes the responsibility of transformation
25415 //   peepprocedure ( inc_mov_to_lea );
25416 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25417 //   // node is passed into the function above
25418 //   peepreplace ( leaI_rReg_immI() );
25419 // %}
25420 
25421 // These instructions is not matched by the matcher but used by the peephole
25422 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25423 %{
25424   predicate(false);
25425   match(Set dst (AddI src1 src2));
25426   format %{ "leal    $dst, [$src1 + $src2]" %}
25427   ins_encode %{
25428     Register dst = $dst$$Register;
25429     Register src1 = $src1$$Register;
25430     Register src2 = $src2$$Register;
25431     if (src1 != rbp && src1 != r13) {
25432       __ leal(dst, Address(src1, src2, Address::times_1));
25433     } else {
25434       assert(src2 != rbp && src2 != r13, "");
25435       __ leal(dst, Address(src2, src1, Address::times_1));
25436     }
25437   %}
25438   ins_pipe(ialu_reg_reg);
25439 %}
25440 
25441 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25442 %{
25443   predicate(false);
25444   match(Set dst (AddI src1 src2));
25445   format %{ "leal    $dst, [$src1 + $src2]" %}
25446   ins_encode %{
25447     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25448   %}
25449   ins_pipe(ialu_reg_reg);
25450 %}
25451 
25452 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25453 %{
25454   predicate(false);
25455   match(Set dst (LShiftI src shift));
25456   format %{ "leal    $dst, [$src << $shift]" %}
25457   ins_encode %{
25458     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25459     Register src = $src$$Register;
25460     if (scale == Address::times_2 && src != rbp && src != r13) {
25461       __ leal($dst$$Register, Address(src, src, Address::times_1));
25462     } else {
25463       __ leal($dst$$Register, Address(noreg, src, scale));
25464     }
25465   %}
25466   ins_pipe(ialu_reg_reg);
25467 %}
25468 
25469 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25470 %{
25471   predicate(false);
25472   match(Set dst (AddL src1 src2));
25473   format %{ "leaq    $dst, [$src1 + $src2]" %}
25474   ins_encode %{
25475     Register dst = $dst$$Register;
25476     Register src1 = $src1$$Register;
25477     Register src2 = $src2$$Register;
25478     if (src1 != rbp && src1 != r13) {
25479       __ leaq(dst, Address(src1, src2, Address::times_1));
25480     } else {
25481       assert(src2 != rbp && src2 != r13, "");
25482       __ leaq(dst, Address(src2, src1, Address::times_1));
25483     }
25484   %}
25485   ins_pipe(ialu_reg_reg);
25486 %}
25487 
25488 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25489 %{
25490   predicate(false);
25491   match(Set dst (AddL src1 src2));
25492   format %{ "leaq    $dst, [$src1 + $src2]" %}
25493   ins_encode %{
25494     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25495   %}
25496   ins_pipe(ialu_reg_reg);
25497 %}
25498 
25499 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25500 %{
25501   predicate(false);
25502   match(Set dst (LShiftL src shift));
25503   format %{ "leaq    $dst, [$src << $shift]" %}
25504   ins_encode %{
25505     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25506     Register src = $src$$Register;
25507     if (scale == Address::times_2 && src != rbp && src != r13) {
25508       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25509     } else {
25510       __ leaq($dst$$Register, Address(noreg, src, scale));
25511     }
25512   %}
25513   ins_pipe(ialu_reg_reg);
25514 %}
25515 
25516 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25517 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25518 // processors with at least partial ALU support for lea
25519 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25520 // beneficial for processors with full ALU support
25521 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25522 
25523 peephole
25524 %{
25525   peeppredicate(VM_Version::supports_fast_2op_lea());
25526   peepmatch (addI_rReg);
25527   peepprocedure (lea_coalesce_reg);
25528   peepreplace (leaI_rReg_rReg_peep());
25529 %}
25530 
25531 peephole
25532 %{
25533   peeppredicate(VM_Version::supports_fast_2op_lea());
25534   peepmatch (addI_rReg_imm);
25535   peepprocedure (lea_coalesce_imm);
25536   peepreplace (leaI_rReg_immI_peep());
25537 %}
25538 
25539 peephole
25540 %{
25541   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25542                 VM_Version::is_intel_cascade_lake());
25543   peepmatch (incI_rReg);
25544   peepprocedure (lea_coalesce_imm);
25545   peepreplace (leaI_rReg_immI_peep());
25546 %}
25547 
25548 peephole
25549 %{
25550   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25551                 VM_Version::is_intel_cascade_lake());
25552   peepmatch (decI_rReg);
25553   peepprocedure (lea_coalesce_imm);
25554   peepreplace (leaI_rReg_immI_peep());
25555 %}
25556 
25557 peephole
25558 %{
25559   peeppredicate(VM_Version::supports_fast_2op_lea());
25560   peepmatch (salI_rReg_immI2);
25561   peepprocedure (lea_coalesce_imm);
25562   peepreplace (leaI_rReg_immI2_peep());
25563 %}
25564 
25565 peephole
25566 %{
25567   peeppredicate(VM_Version::supports_fast_2op_lea());
25568   peepmatch (addL_rReg);
25569   peepprocedure (lea_coalesce_reg);
25570   peepreplace (leaL_rReg_rReg_peep());
25571 %}
25572 
25573 peephole
25574 %{
25575   peeppredicate(VM_Version::supports_fast_2op_lea());
25576   peepmatch (addL_rReg_imm);
25577   peepprocedure (lea_coalesce_imm);
25578   peepreplace (leaL_rReg_immL32_peep());
25579 %}
25580 
25581 peephole
25582 %{
25583   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25584                 VM_Version::is_intel_cascade_lake());
25585   peepmatch (incL_rReg);
25586   peepprocedure (lea_coalesce_imm);
25587   peepreplace (leaL_rReg_immL32_peep());
25588 %}
25589 
25590 peephole
25591 %{
25592   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25593                 VM_Version::is_intel_cascade_lake());
25594   peepmatch (decL_rReg);
25595   peepprocedure (lea_coalesce_imm);
25596   peepreplace (leaL_rReg_immL32_peep());
25597 %}
25598 
25599 peephole
25600 %{
25601   peeppredicate(VM_Version::supports_fast_2op_lea());
25602   peepmatch (salL_rReg_immI2);
25603   peepprocedure (lea_coalesce_imm);
25604   peepreplace (leaL_rReg_immI2_peep());
25605 %}
25606 
25607 peephole
25608 %{
25609   peepmatch (leaPCompressedOopOffset);
25610   peepprocedure (lea_remove_redundant);
25611 %}
25612 
25613 peephole
25614 %{
25615   peepmatch (leaP8Narrow);
25616   peepprocedure (lea_remove_redundant);
25617 %}
25618 
25619 peephole
25620 %{
25621   peepmatch (leaP32Narrow);
25622   peepprocedure (lea_remove_redundant);
25623 %}
25624 
25625 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25626 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25627 
25628 //int variant
25629 peephole
25630 %{
25631   peepmatch (testI_reg);
25632   peepprocedure (test_may_remove);
25633 %}
25634 
25635 //long variant
25636 peephole
25637 %{
25638   peepmatch (testL_reg);
25639   peepprocedure (test_may_remove);
25640 %}
25641 
25642 
25643 //----------SMARTSPILL RULES---------------------------------------------------
25644 // These must follow all instruction definitions as they use the names
25645 // defined in the instructions definitions.