1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 
 1663 //
 1664 // Compute padding required for nodes which need alignment
 1665 //
 1666 
 1667 // The address of the call instruction needs to be 4-byte aligned to
 1668 // ensure that it does not span a cache line so that it can be patched.
 1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1670 {
 1671   current_offset += clear_avx_size(); // skip vzeroupper
 1672   current_offset += 1; // skip call opcode byte
 1673   return align_up(current_offset, alignment_required()) - current_offset;
 1674 }
 1675 
 1676 // The address of the call instruction needs to be 4-byte aligned to
 1677 // ensure that it does not span a cache line so that it can be patched.
 1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1679 {
 1680   current_offset += clear_avx_size(); // skip vzeroupper
 1681   current_offset += 11; // skip movq instruction + call opcode byte
 1682   return align_up(current_offset, alignment_required()) - current_offset;
 1683 }
 1684 
 1685 // This could be in MacroAssembler but it's fairly C2 specific
 1686 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1687   Label exit;
 1688   __ jccb(Assembler::noParity, exit);
 1689   __ pushf();
 1690   //
 1691   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1692   // zero OF,AF,SF for NaN values.
 1693   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1694   // values returns 'less than' result (CF is set).
 1695   // Leave the rest of flags unchanged.
 1696   //
 1697   //    7 6 5 4 3 2 1 0
 1698   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1699   //    0 0 1 0 1 0 1 1   (0x2B)
 1700   //
 1701   __ andq(Address(rsp, 0), 0xffffff2b);
 1702   __ popf();
 1703   __ bind(exit);
 1704 }
 1705 
 1706 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1707   // If any floating point comparison instruction is used, unordered case always triggers jump
 1708   // for below condition, CF=1 is true when at least one input is NaN
 1709   Label done;
 1710   __ movl(dst, -1);
 1711   __ jcc(Assembler::below, done);
 1712   __ setcc(Assembler::notEqual, dst);
 1713   __ bind(done);
 1714 }
 1715 
 1716 enum FP_PREC {
 1717   fp_prec_hlf,
 1718   fp_prec_flt,
 1719   fp_prec_dbl
 1720 };
 1721 
 1722 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
 1723                                 XMMRegister p, XMMRegister q) {
 1724   if (pt == fp_prec_hlf) {
 1725     __ evucomish(p, q);
 1726   } else if (pt == fp_prec_flt) {
 1727     __ ucomiss(p, q);
 1728   } else {
 1729     __ ucomisd(p, q);
 1730   }
 1731 }
 1732 
 1733 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
 1734                          XMMRegister dst, XMMRegister src, Register scratch) {
 1735   if (pt == fp_prec_hlf) {
 1736     __ movhlf(dst, src, scratch);
 1737   } else if (pt == fp_prec_flt) {
 1738     __ movflt(dst, src);
 1739   } else {
 1740     __ movdbl(dst, src);
 1741   }
 1742 }
 1743 
 1744 // Math.min()          # Math.max()
 1745 // -----------------------------
 1746 // (v)ucomis[h/s/d]    #
 1747 // ja   -> b           # a
 1748 // jp   -> NaN         # NaN
 1749 // jb   -> a           # b
 1750 // je                  #
 1751 // |-jz -> a | b       # a & b
 1752 // |    -> a           #
 1753 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1754                             XMMRegister a, XMMRegister b,
 1755                             XMMRegister xmmt, Register rt,
 1756                             bool min, enum FP_PREC pt) {
 1757 
 1758   Label nan, zero, below, above, done;
 1759 
 1760   emit_fp_ucom(masm, pt, a, b);
 1761 
 1762   if (dst->encoding() != (min ? b : a)->encoding()) {
 1763     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1764   } else {
 1765     __ jccb(Assembler::above, done);
 1766   }
 1767 
 1768   __ jccb(Assembler::parity, nan);  // PF=1
 1769   __ jccb(Assembler::below, below); // CF=1
 1770 
 1771   // equal
 1772   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1773   emit_fp_ucom(masm, pt, a, xmmt);
 1774 
 1775   __ jccb(Assembler::equal, zero);
 1776   movfp(masm, pt, dst, a, rt);
 1777 
 1778   __ jmp(done);
 1779 
 1780   __ bind(zero);
 1781   if (min) {
 1782     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1783   } else {
 1784     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1785   }
 1786 
 1787   __ jmp(done);
 1788 
 1789   __ bind(above);
 1790   movfp(masm, pt, dst, min ? b : a, rt);
 1791 
 1792   __ jmp(done);
 1793 
 1794   __ bind(nan);
 1795   if (pt == fp_prec_hlf) {
 1796     __ movl(rt, 0x00007e00); // Float16.NaN
 1797     __ evmovw(dst, rt);
 1798   } else if (pt == fp_prec_flt) {
 1799     __ movl(rt, 0x7fc00000); // Float.NaN
 1800     __ movdl(dst, rt);
 1801   } else {
 1802     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1803     __ movdq(dst, rt);
 1804   }
 1805   __ jmp(done);
 1806 
 1807   __ bind(below);
 1808   movfp(masm, pt, dst, min ? a : b, rt);
 1809 
 1810   __ bind(done);
 1811 }
 1812 
 1813 //=============================================================================
 1814 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1815 
 1816 int ConstantTable::calculate_table_base_offset() const {
 1817   return 0;  // absolute addressing, no offset
 1818 }
 1819 
 1820 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1821 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1822   ShouldNotReachHere();
 1823 }
 1824 
 1825 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1826   // Empty encoding
 1827 }
 1828 
 1829 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1830   return 0;
 1831 }
 1832 
 1833 #ifndef PRODUCT
 1834 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1835   st->print("# MachConstantBaseNode (empty encoding)");
 1836 }
 1837 #endif
 1838 
 1839 
 1840 //=============================================================================
 1841 #ifndef PRODUCT
 1842 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1843   Compile* C = ra_->C;
 1844 
 1845   int framesize = C->output()->frame_size_in_bytes();
 1846   int bangsize = C->output()->bang_size_in_bytes();
 1847   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1848   // Remove wordSize for return addr which is already pushed.
 1849   framesize -= wordSize;
 1850 
 1851   if (C->output()->need_stack_bang(bangsize)) {
 1852     framesize -= wordSize;
 1853     st->print("# stack bang (%d bytes)", bangsize);
 1854     st->print("\n\t");
 1855     st->print("pushq   rbp\t# Save rbp");
 1856     if (PreserveFramePointer) {
 1857         st->print("\n\t");
 1858         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1859     }
 1860     if (framesize) {
 1861       st->print("\n\t");
 1862       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1863     }
 1864   } else {
 1865     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1866     st->print("\n\t");
 1867     framesize -= wordSize;
 1868     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1869     if (PreserveFramePointer) {
 1870       st->print("\n\t");
 1871       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1872       if (framesize > 0) {
 1873         st->print("\n\t");
 1874         st->print("addq    rbp, #%d", framesize);
 1875       }
 1876     }
 1877   }
 1878 
 1879   if (VerifyStackAtCalls) {
 1880     st->print("\n\t");
 1881     framesize -= wordSize;
 1882     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1883 #ifdef ASSERT
 1884     st->print("\n\t");
 1885     st->print("# stack alignment check");
 1886 #endif
 1887   }
 1888   if (C->stub_function() != nullptr) {
 1889     st->print("\n\t");
 1890     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1891     st->print("\n\t");
 1892     st->print("je      fast_entry\t");
 1893     st->print("\n\t");
 1894     st->print("call    #nmethod_entry_barrier_stub\t");
 1895     st->print("\n\tfast_entry:");
 1896   }
 1897   st->cr();
 1898 }
 1899 #endif
 1900 
 1901 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1902   Compile* C = ra_->C;
 1903 
 1904   __ verified_entry(C);
 1905 
 1906   if (ra_->C->stub_function() == nullptr) {
 1907     __ entry_barrier();
 1908   }
 1909 
 1910   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1911     __ bind(*_verified_entry);
 1912   }
 1913 
 1914   C->output()->set_frame_complete(__ offset());
 1915 
 1916   if (C->has_mach_constant_base_node()) {
 1917     // NOTE: We set the table base offset here because users might be
 1918     // emitted before MachConstantBaseNode.
 1919     ConstantTable& constant_table = C->output()->constant_table();
 1920     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1921   }
 1922 }
 1923 
 1924 
 1925 int MachPrologNode::reloc() const
 1926 {
 1927   return 0; // a large enough number
 1928 }
 1929 
 1930 //=============================================================================
 1931 #ifndef PRODUCT
 1932 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1933 {
 1934   Compile* C = ra_->C;
 1935   if (generate_vzeroupper(C)) {
 1936     st->print("vzeroupper");
 1937     st->cr(); st->print("\t");
 1938   }
 1939 
 1940   int framesize = C->output()->frame_size_in_bytes();
 1941   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1942   // Remove word for return adr already pushed
 1943   // and RBP
 1944   framesize -= 2*wordSize;
 1945 
 1946   if (framesize) {
 1947     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1948     st->print("\t");
 1949   }
 1950 
 1951   st->print_cr("popq    rbp");
 1952   if (do_polling() && C->is_method_compilation()) {
 1953     st->print("\t");
 1954     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1955                  "ja      #safepoint_stub\t"
 1956                  "# Safepoint: poll for GC");
 1957   }
 1958 }
 1959 #endif
 1960 
 1961 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1962 {
 1963   Compile* C = ra_->C;
 1964 
 1965   if (generate_vzeroupper(C)) {
 1966     // Clear upper bits of YMM registers when current compiled code uses
 1967     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1968     __ vzeroupper();
 1969   }
 1970 
 1971   // Subtract two words to account for return address and rbp
 1972   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1973   __ remove_frame(initial_framesize, C->needs_stack_repair());
 1974 
 1975   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1976     __ reserved_stack_check();
 1977   }
 1978 
 1979   if (do_polling() && C->is_method_compilation()) {
 1980     Label dummy_label;
 1981     Label* code_stub = &dummy_label;
 1982     if (!C->output()->in_scratch_emit_size()) {
 1983       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1984       C->output()->add_stub(stub);
 1985       code_stub = &stub->entry();
 1986     }
 1987     __ relocate(relocInfo::poll_return_type);
 1988     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1989   }
 1990 }
 1991 
 1992 int MachEpilogNode::reloc() const
 1993 {
 1994   return 2; // a large enough number
 1995 }
 1996 
 1997 const Pipeline* MachEpilogNode::pipeline() const
 1998 {
 1999   return MachNode::pipeline_class();
 2000 }
 2001 
 2002 //=============================================================================
 2003 
 2004 enum RC {
 2005   rc_bad,
 2006   rc_int,
 2007   rc_kreg,
 2008   rc_float,
 2009   rc_stack
 2010 };
 2011 
 2012 static enum RC rc_class(OptoReg::Name reg)
 2013 {
 2014   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2015 
 2016   if (OptoReg::is_stack(reg)) return rc_stack;
 2017 
 2018   VMReg r = OptoReg::as_VMReg(reg);
 2019 
 2020   if (r->is_Register()) return rc_int;
 2021 
 2022   if (r->is_KRegister()) return rc_kreg;
 2023 
 2024   assert(r->is_XMMRegister(), "must be");
 2025   return rc_float;
 2026 }
 2027 
 2028 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2029 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2030                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2031 
 2032 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2033                      int stack_offset, int reg, uint ireg, outputStream* st);
 2034 
 2035 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2036                                       int dst_offset, uint ireg, outputStream* st) {
 2037   if (masm) {
 2038     switch (ireg) {
 2039     case Op_VecS:
 2040       __ movq(Address(rsp, -8), rax);
 2041       __ movl(rax, Address(rsp, src_offset));
 2042       __ movl(Address(rsp, dst_offset), rax);
 2043       __ movq(rax, Address(rsp, -8));
 2044       break;
 2045     case Op_VecD:
 2046       __ pushq(Address(rsp, src_offset));
 2047       __ popq (Address(rsp, dst_offset));
 2048       break;
 2049     case Op_VecX:
 2050       __ pushq(Address(rsp, src_offset));
 2051       __ popq (Address(rsp, dst_offset));
 2052       __ pushq(Address(rsp, src_offset+8));
 2053       __ popq (Address(rsp, dst_offset+8));
 2054       break;
 2055     case Op_VecY:
 2056       __ vmovdqu(Address(rsp, -32), xmm0);
 2057       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2058       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2059       __ vmovdqu(xmm0, Address(rsp, -32));
 2060       break;
 2061     case Op_VecZ:
 2062       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2063       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2064       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2065       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2066       break;
 2067     default:
 2068       ShouldNotReachHere();
 2069     }
 2070 #ifndef PRODUCT
 2071   } else {
 2072     switch (ireg) {
 2073     case Op_VecS:
 2074       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2075                 "movl    rax, [rsp + #%d]\n\t"
 2076                 "movl    [rsp + #%d], rax\n\t"
 2077                 "movq    rax, [rsp - #8]",
 2078                 src_offset, dst_offset);
 2079       break;
 2080     case Op_VecD:
 2081       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2082                 "popq    [rsp + #%d]",
 2083                 src_offset, dst_offset);
 2084       break;
 2085      case Op_VecX:
 2086       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2087                 "popq    [rsp + #%d]\n\t"
 2088                 "pushq   [rsp + #%d]\n\t"
 2089                 "popq    [rsp + #%d]",
 2090                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2091       break;
 2092     case Op_VecY:
 2093       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2094                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2095                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2096                 "vmovdqu xmm0, [rsp - #32]",
 2097                 src_offset, dst_offset);
 2098       break;
 2099     case Op_VecZ:
 2100       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2101                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2102                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2103                 "vmovdqu xmm0, [rsp - #64]",
 2104                 src_offset, dst_offset);
 2105       break;
 2106     default:
 2107       ShouldNotReachHere();
 2108     }
 2109 #endif
 2110   }
 2111 }
 2112 
 2113 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2114                                        PhaseRegAlloc* ra_,
 2115                                        bool do_size,
 2116                                        outputStream* st) const {
 2117   assert(masm != nullptr || st  != nullptr, "sanity");
 2118   // Get registers to move
 2119   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2120   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2121   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2122   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2123 
 2124   enum RC src_second_rc = rc_class(src_second);
 2125   enum RC src_first_rc = rc_class(src_first);
 2126   enum RC dst_second_rc = rc_class(dst_second);
 2127   enum RC dst_first_rc = rc_class(dst_first);
 2128 
 2129   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2130          "must move at least 1 register" );
 2131 
 2132   if (src_first == dst_first && src_second == dst_second) {
 2133     // Self copy, no move
 2134     return 0;
 2135   }
 2136   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 2137     uint ireg = ideal_reg();
 2138     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2139     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2140     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2141       // mem -> mem
 2142       int src_offset = ra_->reg2offset(src_first);
 2143       int dst_offset = ra_->reg2offset(dst_first);
 2144       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2145     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2146       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2147     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2148       int stack_offset = ra_->reg2offset(dst_first);
 2149       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2150     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2151       int stack_offset = ra_->reg2offset(src_first);
 2152       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2153     } else {
 2154       ShouldNotReachHere();
 2155     }
 2156     return 0;
 2157   }
 2158   if (src_first_rc == rc_stack) {
 2159     // mem ->
 2160     if (dst_first_rc == rc_stack) {
 2161       // mem -> mem
 2162       assert(src_second != dst_first, "overlap");
 2163       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2164           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2165         // 64-bit
 2166         int src_offset = ra_->reg2offset(src_first);
 2167         int dst_offset = ra_->reg2offset(dst_first);
 2168         if (masm) {
 2169           __ pushq(Address(rsp, src_offset));
 2170           __ popq (Address(rsp, dst_offset));
 2171 #ifndef PRODUCT
 2172         } else {
 2173           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2174                     "popq    [rsp + #%d]",
 2175                      src_offset, dst_offset);
 2176 #endif
 2177         }
 2178       } else {
 2179         // 32-bit
 2180         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2181         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2182         // No pushl/popl, so:
 2183         int src_offset = ra_->reg2offset(src_first);
 2184         int dst_offset = ra_->reg2offset(dst_first);
 2185         if (masm) {
 2186           __ movq(Address(rsp, -8), rax);
 2187           __ movl(rax, Address(rsp, src_offset));
 2188           __ movl(Address(rsp, dst_offset), rax);
 2189           __ movq(rax, Address(rsp, -8));
 2190 #ifndef PRODUCT
 2191         } else {
 2192           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2193                     "movl    rax, [rsp + #%d]\n\t"
 2194                     "movl    [rsp + #%d], rax\n\t"
 2195                     "movq    rax, [rsp - #8]",
 2196                      src_offset, dst_offset);
 2197 #endif
 2198         }
 2199       }
 2200       return 0;
 2201     } else if (dst_first_rc == rc_int) {
 2202       // mem -> gpr
 2203       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2204           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2205         // 64-bit
 2206         int offset = ra_->reg2offset(src_first);
 2207         if (masm) {
 2208           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2209 #ifndef PRODUCT
 2210         } else {
 2211           st->print("movq    %s, [rsp + #%d]\t# spill",
 2212                      Matcher::regName[dst_first],
 2213                      offset);
 2214 #endif
 2215         }
 2216       } else {
 2217         // 32-bit
 2218         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2219         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2220         int offset = ra_->reg2offset(src_first);
 2221         if (masm) {
 2222           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2223 #ifndef PRODUCT
 2224         } else {
 2225           st->print("movl    %s, [rsp + #%d]\t# spill",
 2226                      Matcher::regName[dst_first],
 2227                      offset);
 2228 #endif
 2229         }
 2230       }
 2231       return 0;
 2232     } else if (dst_first_rc == rc_float) {
 2233       // mem-> xmm
 2234       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2235           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2236         // 64-bit
 2237         int offset = ra_->reg2offset(src_first);
 2238         if (masm) {
 2239           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2240 #ifndef PRODUCT
 2241         } else {
 2242           st->print("%s  %s, [rsp + #%d]\t# spill",
 2243                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2244                      Matcher::regName[dst_first],
 2245                      offset);
 2246 #endif
 2247         }
 2248       } else {
 2249         // 32-bit
 2250         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2251         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2252         int offset = ra_->reg2offset(src_first);
 2253         if (masm) {
 2254           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2255 #ifndef PRODUCT
 2256         } else {
 2257           st->print("movss   %s, [rsp + #%d]\t# spill",
 2258                      Matcher::regName[dst_first],
 2259                      offset);
 2260 #endif
 2261         }
 2262       }
 2263       return 0;
 2264     } else if (dst_first_rc == rc_kreg) {
 2265       // mem -> kreg
 2266       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2267           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2268         // 64-bit
 2269         int offset = ra_->reg2offset(src_first);
 2270         if (masm) {
 2271           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2272 #ifndef PRODUCT
 2273         } else {
 2274           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2275                      Matcher::regName[dst_first],
 2276                      offset);
 2277 #endif
 2278         }
 2279       }
 2280       return 0;
 2281     }
 2282   } else if (src_first_rc == rc_int) {
 2283     // gpr ->
 2284     if (dst_first_rc == rc_stack) {
 2285       // gpr -> mem
 2286       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2287           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2288         // 64-bit
 2289         int offset = ra_->reg2offset(dst_first);
 2290         if (masm) {
 2291           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2292 #ifndef PRODUCT
 2293         } else {
 2294           st->print("movq    [rsp + #%d], %s\t# spill",
 2295                      offset,
 2296                      Matcher::regName[src_first]);
 2297 #endif
 2298         }
 2299       } else {
 2300         // 32-bit
 2301         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2302         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2303         int offset = ra_->reg2offset(dst_first);
 2304         if (masm) {
 2305           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2306 #ifndef PRODUCT
 2307         } else {
 2308           st->print("movl    [rsp + #%d], %s\t# spill",
 2309                      offset,
 2310                      Matcher::regName[src_first]);
 2311 #endif
 2312         }
 2313       }
 2314       return 0;
 2315     } else if (dst_first_rc == rc_int) {
 2316       // gpr -> gpr
 2317       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2318           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2319         // 64-bit
 2320         if (masm) {
 2321           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2322                   as_Register(Matcher::_regEncode[src_first]));
 2323 #ifndef PRODUCT
 2324         } else {
 2325           st->print("movq    %s, %s\t# spill",
 2326                      Matcher::regName[dst_first],
 2327                      Matcher::regName[src_first]);
 2328 #endif
 2329         }
 2330         return 0;
 2331       } else {
 2332         // 32-bit
 2333         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2334         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2335         if (masm) {
 2336           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2337                   as_Register(Matcher::_regEncode[src_first]));
 2338 #ifndef PRODUCT
 2339         } else {
 2340           st->print("movl    %s, %s\t# spill",
 2341                      Matcher::regName[dst_first],
 2342                      Matcher::regName[src_first]);
 2343 #endif
 2344         }
 2345         return 0;
 2346       }
 2347     } else if (dst_first_rc == rc_float) {
 2348       // gpr -> xmm
 2349       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2350           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2351         // 64-bit
 2352         if (masm) {
 2353           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2354 #ifndef PRODUCT
 2355         } else {
 2356           st->print("movdq   %s, %s\t# spill",
 2357                      Matcher::regName[dst_first],
 2358                      Matcher::regName[src_first]);
 2359 #endif
 2360         }
 2361       } else {
 2362         // 32-bit
 2363         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2364         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2365         if (masm) {
 2366           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2367 #ifndef PRODUCT
 2368         } else {
 2369           st->print("movdl   %s, %s\t# spill",
 2370                      Matcher::regName[dst_first],
 2371                      Matcher::regName[src_first]);
 2372 #endif
 2373         }
 2374       }
 2375       return 0;
 2376     } else if (dst_first_rc == rc_kreg) {
 2377       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2378           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2379         // 64-bit
 2380         if (masm) {
 2381           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2382   #ifndef PRODUCT
 2383         } else {
 2384            st->print("kmovq   %s, %s\t# spill",
 2385                        Matcher::regName[dst_first],
 2386                        Matcher::regName[src_first]);
 2387   #endif
 2388         }
 2389       }
 2390       Unimplemented();
 2391       return 0;
 2392     }
 2393   } else if (src_first_rc == rc_float) {
 2394     // xmm ->
 2395     if (dst_first_rc == rc_stack) {
 2396       // xmm -> mem
 2397       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2398           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2399         // 64-bit
 2400         int offset = ra_->reg2offset(dst_first);
 2401         if (masm) {
 2402           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2403 #ifndef PRODUCT
 2404         } else {
 2405           st->print("movsd   [rsp + #%d], %s\t# spill",
 2406                      offset,
 2407                      Matcher::regName[src_first]);
 2408 #endif
 2409         }
 2410       } else {
 2411         // 32-bit
 2412         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2413         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2414         int offset = ra_->reg2offset(dst_first);
 2415         if (masm) {
 2416           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2417 #ifndef PRODUCT
 2418         } else {
 2419           st->print("movss   [rsp + #%d], %s\t# spill",
 2420                      offset,
 2421                      Matcher::regName[src_first]);
 2422 #endif
 2423         }
 2424       }
 2425       return 0;
 2426     } else if (dst_first_rc == rc_int) {
 2427       // xmm -> gpr
 2428       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2429           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2430         // 64-bit
 2431         if (masm) {
 2432           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2433 #ifndef PRODUCT
 2434         } else {
 2435           st->print("movdq   %s, %s\t# spill",
 2436                      Matcher::regName[dst_first],
 2437                      Matcher::regName[src_first]);
 2438 #endif
 2439         }
 2440       } else {
 2441         // 32-bit
 2442         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2443         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2444         if (masm) {
 2445           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2446 #ifndef PRODUCT
 2447         } else {
 2448           st->print("movdl   %s, %s\t# spill",
 2449                      Matcher::regName[dst_first],
 2450                      Matcher::regName[src_first]);
 2451 #endif
 2452         }
 2453       }
 2454       return 0;
 2455     } else if (dst_first_rc == rc_float) {
 2456       // xmm -> xmm
 2457       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2458           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2459         // 64-bit
 2460         if (masm) {
 2461           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2462 #ifndef PRODUCT
 2463         } else {
 2464           st->print("%s  %s, %s\t# spill",
 2465                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2466                      Matcher::regName[dst_first],
 2467                      Matcher::regName[src_first]);
 2468 #endif
 2469         }
 2470       } else {
 2471         // 32-bit
 2472         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2473         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2474         if (masm) {
 2475           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2476 #ifndef PRODUCT
 2477         } else {
 2478           st->print("%s  %s, %s\t# spill",
 2479                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2480                      Matcher::regName[dst_first],
 2481                      Matcher::regName[src_first]);
 2482 #endif
 2483         }
 2484       }
 2485       return 0;
 2486     } else if (dst_first_rc == rc_kreg) {
 2487       assert(false, "Illegal spilling");
 2488       return 0;
 2489     }
 2490   } else if (src_first_rc == rc_kreg) {
 2491     if (dst_first_rc == rc_stack) {
 2492       // mem -> kreg
 2493       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2494           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2495         // 64-bit
 2496         int offset = ra_->reg2offset(dst_first);
 2497         if (masm) {
 2498           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2499 #ifndef PRODUCT
 2500         } else {
 2501           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2502                      offset,
 2503                      Matcher::regName[src_first]);
 2504 #endif
 2505         }
 2506       }
 2507       return 0;
 2508     } else if (dst_first_rc == rc_int) {
 2509       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2510           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2511         // 64-bit
 2512         if (masm) {
 2513           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2514 #ifndef PRODUCT
 2515         } else {
 2516          st->print("kmovq   %s, %s\t# spill",
 2517                      Matcher::regName[dst_first],
 2518                      Matcher::regName[src_first]);
 2519 #endif
 2520         }
 2521       }
 2522       Unimplemented();
 2523       return 0;
 2524     } else if (dst_first_rc == rc_kreg) {
 2525       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2526           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2527         // 64-bit
 2528         if (masm) {
 2529           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2530 #ifndef PRODUCT
 2531         } else {
 2532          st->print("kmovq   %s, %s\t# spill",
 2533                      Matcher::regName[dst_first],
 2534                      Matcher::regName[src_first]);
 2535 #endif
 2536         }
 2537       }
 2538       return 0;
 2539     } else if (dst_first_rc == rc_float) {
 2540       assert(false, "Illegal spill");
 2541       return 0;
 2542     }
 2543   }
 2544 
 2545   assert(0," foo ");
 2546   Unimplemented();
 2547   return 0;
 2548 }
 2549 
 2550 #ifndef PRODUCT
 2551 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2552   implementation(nullptr, ra_, false, st);
 2553 }
 2554 #endif
 2555 
 2556 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2557   implementation(masm, ra_, false, nullptr);
 2558 }
 2559 
 2560 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2561   return MachNode::size(ra_);
 2562 }
 2563 
 2564 //=============================================================================
 2565 #ifndef PRODUCT
 2566 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2567 {
 2568   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2569   int reg = ra_->get_reg_first(this);
 2570   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2571             Matcher::regName[reg], offset);
 2572 }
 2573 #endif
 2574 
 2575 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2576 {
 2577   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2578   int reg = ra_->get_encode(this);
 2579 
 2580   __ lea(as_Register(reg), Address(rsp, offset));
 2581 }
 2582 
 2583 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2584 {
 2585   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2586   if (ra_->get_encode(this) > 15) {
 2587     return (offset < 0x80) ? 6 : 9; // REX2
 2588   } else {
 2589     return (offset < 0x80) ? 5 : 8; // REX
 2590   }
 2591 }
 2592 
 2593 //=============================================================================
 2594 #ifndef PRODUCT
 2595 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2596 {
 2597   st->print_cr("MachVEPNode");
 2598 }
 2599 #endif
 2600 
 2601 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2602 {
 2603   CodeBuffer* cbuf = masm->code();
 2604   uint insts_size = cbuf->insts_size();
 2605   if (!_verified) {
 2606     __ ic_check(1);
 2607   } else {
 2608     // TODO 8284443 Avoid creation of temporary frame
 2609     if (ra_->C->stub_function() == nullptr) {
 2610       __ verified_entry(ra_->C, 0);
 2611       __ entry_barrier();
 2612       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2613       __ remove_frame(initial_framesize, false);
 2614     }
 2615     // Unpack inline type args passed as oop and then jump to
 2616     // the verified entry point (skipping the unverified entry).
 2617     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2618     // Emit code for verified entry and save increment for stack repair on return
 2619     __ verified_entry(ra_->C, sp_inc);
 2620     if (Compile::current()->output()->in_scratch_emit_size()) {
 2621       Label dummy_verified_entry;
 2622       __ jmp(dummy_verified_entry);
 2623     } else {
 2624       __ jmp(*_verified_entry);
 2625     }
 2626   }
 2627   /* WARNING these NOPs are critical so that verified entry point is properly
 2628      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 2629   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 2630   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 2631   if (nops_cnt > 0) {
 2632     __ nop(nops_cnt);
 2633   }
 2634 }
 2635 
 2636 //=============================================================================
 2637 #ifndef PRODUCT
 2638 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2639 {
 2640   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2641   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2642   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2643 }
 2644 #endif
 2645 
 2646 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2647 {
 2648   __ ic_check(InteriorEntryAlignment);
 2649 }
 2650 
 2651 
 2652 //=============================================================================
 2653 
 2654 bool Matcher::supports_vector_calling_convention(void) {
 2655   return EnableVectorSupport;
 2656 }
 2657 
 2658 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2659   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2660 }
 2661 
 2662 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2663   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2664 }
 2665 
 2666 #ifdef ASSERT
 2667 static bool is_ndd_demotable(const MachNode* mdef) {
 2668   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2669 }
 2670 #endif
 2671 
 2672 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2673                                             int oper_index) {
 2674   if (mdef == nullptr) {
 2675     return false;
 2676   }
 2677 
 2678   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2679       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2680     assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
 2681     assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
 2682     return false;
 2683   }
 2684 
 2685   // Complex memory operand covers multiple incoming edges needed for
 2686   // address computation. Biasing def towards any address component will not
 2687   // result in NDD demotion by assembler.
 2688   if (mdef->operand_num_edges(oper_index) != 1) {
 2689     return false;
 2690   }
 2691 
 2692   // Demotion candidate must be register mask compatible with definition.
 2693   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2694   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2695     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2696     return false;
 2697   }
 2698 
 2699   switch (oper_index) {
 2700   // First operand of MachNode corresponding to Intel APX NDD selection
 2701   // pattern can share its assigned register with definition operand if
 2702   // their live ranges do not overlap. In such a scenario we can demote
 2703   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2704   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2705   // are decorated with a special flag by instruction selector.
 2706   case 1:
 2707     return is_ndd_demotable_opr1(mdef);
 2708 
 2709   // Definition operand of commutative operation can be biased towards second
 2710   // operand.
 2711   case 2:
 2712     return is_ndd_demotable_opr2(mdef);
 2713 
 2714   // Current scheme only selects up to two biasing candidates
 2715   default:
 2716     assert(false, "unhandled operand index: %s", mdef->Name());
 2717     break;
 2718   }
 2719 
 2720   return false;
 2721 }
 2722 
 2723 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2724   assert(EnableVectorSupport, "sanity");
 2725   int lo = XMM0_num;
 2726   int hi = XMM0b_num;
 2727   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2728   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2729   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2730   return OptoRegPair(hi, lo);
 2731 }
 2732 
 2733 // Is this branch offset short enough that a short branch can be used?
 2734 //
 2735 // NOTE: If the platform does not provide any short branch variants, then
 2736 //       this method should return false for offset 0.
 2737 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2738   // The passed offset is relative to address of the branch.
 2739   // On 86 a branch displacement is calculated relative to address
 2740   // of a next instruction.
 2741   offset -= br_size;
 2742 
 2743   // the short version of jmpConUCF2 contains multiple branches,
 2744   // making the reach slightly less
 2745   if (rule == jmpConUCF2_rule)
 2746     return (-126 <= offset && offset <= 125);
 2747   return (-128 <= offset && offset <= 127);
 2748 }
 2749 
 2750 #ifdef ASSERT
 2751 // Return whether or not this register is ever used as an argument.
 2752 bool Matcher::can_be_java_arg(int reg)
 2753 {
 2754   return
 2755     reg ==  RDI_num || reg == RDI_H_num ||
 2756     reg ==  RSI_num || reg == RSI_H_num ||
 2757     reg ==  RDX_num || reg == RDX_H_num ||
 2758     reg ==  RCX_num || reg == RCX_H_num ||
 2759     reg ==   R8_num || reg ==  R8_H_num ||
 2760     reg ==   R9_num || reg ==  R9_H_num ||
 2761     reg ==  R12_num || reg == R12_H_num ||
 2762     reg == XMM0_num || reg == XMM0b_num ||
 2763     reg == XMM1_num || reg == XMM1b_num ||
 2764     reg == XMM2_num || reg == XMM2b_num ||
 2765     reg == XMM3_num || reg == XMM3b_num ||
 2766     reg == XMM4_num || reg == XMM4b_num ||
 2767     reg == XMM5_num || reg == XMM5b_num ||
 2768     reg == XMM6_num || reg == XMM6b_num ||
 2769     reg == XMM7_num || reg == XMM7b_num;
 2770 }
 2771 #endif
 2772 
 2773 uint Matcher::int_pressure_limit()
 2774 {
 2775   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2776 }
 2777 
 2778 uint Matcher::float_pressure_limit()
 2779 {
 2780   // After experiment around with different values, the following default threshold
 2781   // works best for LCM's register pressure scheduling on x64.
 2782   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2783   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2784   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2785 }
 2786 
 2787 // Register for DIVI projection of divmodI
 2788 const RegMask& Matcher::divI_proj_mask() {
 2789   return INT_RAX_REG_mask();
 2790 }
 2791 
 2792 // Register for MODI projection of divmodI
 2793 const RegMask& Matcher::modI_proj_mask() {
 2794   return INT_RDX_REG_mask();
 2795 }
 2796 
 2797 // Register for DIVL projection of divmodL
 2798 const RegMask& Matcher::divL_proj_mask() {
 2799   return LONG_RAX_REG_mask();
 2800 }
 2801 
 2802 // Register for MODL projection of divmodL
 2803 const RegMask& Matcher::modL_proj_mask() {
 2804   return LONG_RDX_REG_mask();
 2805 }
 2806 
 2807 %}
 2808 
 2809 source_hpp %{
 2810 // Header information of the source block.
 2811 // Method declarations/definitions which are used outside
 2812 // the ad-scope can conveniently be defined here.
 2813 //
 2814 // To keep related declarations/definitions/uses close together,
 2815 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2816 
 2817 #include "runtime/vm_version.hpp"
 2818 
 2819 class NativeJump;
 2820 
 2821 class CallStubImpl {
 2822 
 2823   //--------------------------------------------------------------
 2824   //---<  Used for optimization in Compile::shorten_branches  >---
 2825   //--------------------------------------------------------------
 2826 
 2827  public:
 2828   // Size of call trampoline stub.
 2829   static uint size_call_trampoline() {
 2830     return 0; // no call trampolines on this platform
 2831   }
 2832 
 2833   // number of relocations needed by a call trampoline stub
 2834   static uint reloc_call_trampoline() {
 2835     return 0; // no call trampolines on this platform
 2836   }
 2837 };
 2838 
 2839 class HandlerImpl {
 2840 
 2841  public:
 2842 
 2843   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2844 
 2845   static uint size_deopt_handler() {
 2846     // one call and one jmp.
 2847     return 7;
 2848   }
 2849 };
 2850 
 2851 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2852   switch(bytes) {
 2853     case  4: // fall-through
 2854     case  8: // fall-through
 2855     case 16: return Assembler::AVX_128bit;
 2856     case 32: return Assembler::AVX_256bit;
 2857     case 64: return Assembler::AVX_512bit;
 2858 
 2859     default: {
 2860       ShouldNotReachHere();
 2861       return Assembler::AVX_NoVec;
 2862     }
 2863   }
 2864 }
 2865 
 2866 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2867   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2868 }
 2869 
 2870 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2871   uint def_idx = use->operand_index(opnd);
 2872   Node* def = use->in(def_idx);
 2873   return vector_length_encoding(def);
 2874 }
 2875 
 2876 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2877   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2878          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2879 }
 2880 
 2881 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2882   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2883            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2884 }
 2885 
 2886 class Node::PD {
 2887 public:
 2888   enum NodeFlags : uint64_t {
 2889     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2890     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2891     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2892     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2893     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2894     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2895     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2896     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2897     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2898     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2899     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2900     Flag_ndd_demotable_opr1   = Node::_last_flag << 12,
 2901     Flag_ndd_demotable_opr2   = Node::_last_flag << 13,
 2902     _last_flag                = Flag_ndd_demotable_opr2
 2903   };
 2904 };
 2905 
 2906 %} // end source_hpp
 2907 
 2908 source %{
 2909 
 2910 #include "opto/addnode.hpp"
 2911 #include "c2_intelJccErratum_x86.hpp"
 2912 
 2913 void PhaseOutput::pd_perform_mach_node_analysis() {
 2914   if (VM_Version::has_intel_jcc_erratum()) {
 2915     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2916     _buf_sizes._code += extra_padding;
 2917   }
 2918 }
 2919 
 2920 int MachNode::pd_alignment_required() const {
 2921   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2922     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2923     return IntelJccErratum::largest_jcc_size() + 1;
 2924   } else {
 2925     return 1;
 2926   }
 2927 }
 2928 
 2929 int MachNode::compute_padding(int current_offset) const {
 2930   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2931     Compile* C = Compile::current();
 2932     PhaseOutput* output = C->output();
 2933     Block* block = output->block();
 2934     int index = output->index();
 2935     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2936   } else {
 2937     return 0;
 2938   }
 2939 }
 2940 
 2941 // Emit deopt handler code.
 2942 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2943 
 2944   // Note that the code buffer's insts_mark is always relative to insts.
 2945   // That's why we must use the macroassembler to generate a handler.
 2946   address base = __ start_a_stub(size_deopt_handler());
 2947   if (base == nullptr) {
 2948     ciEnv::current()->record_failure("CodeCache is full");
 2949     return 0;  // CodeBuffer::expand failed
 2950   }
 2951   int offset = __ offset();
 2952 
 2953   Label start;
 2954   __ bind(start);
 2955 
 2956   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2957 
 2958   int entry_offset = __ offset();
 2959 
 2960   __ jmp(start);
 2961 
 2962   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2963   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2964          "out of bounds read in post-call NOP check");
 2965   __ end_a_stub();
 2966   return entry_offset;
 2967 }
 2968 
 2969 static Assembler::Width widthForType(BasicType bt) {
 2970   if (bt == T_BYTE) {
 2971     return Assembler::B;
 2972   } else if (bt == T_SHORT) {
 2973     return Assembler::W;
 2974   } else if (bt == T_INT) {
 2975     return Assembler::D;
 2976   } else {
 2977     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2978     return Assembler::Q;
 2979   }
 2980 }
 2981 
 2982 //=============================================================================
 2983 
 2984   // Float masks come from different places depending on platform.
 2985   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2986   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2987   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2988   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2989   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2990   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2991   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2992   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2993   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2994   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2995   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2996   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2997   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2998   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2999   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 3000   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 3001   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 3002   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 3003   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 3004 
 3005 //=============================================================================
 3006 bool Matcher::match_rule_supported(int opcode) {
 3007   if (!has_match_rule(opcode)) {
 3008     return false; // no match rule present
 3009   }
 3010   switch (opcode) {
 3011     case Op_AbsVL:
 3012     case Op_StoreVectorScatter:
 3013       if (UseAVX < 3) {
 3014         return false;
 3015       }
 3016       break;
 3017     case Op_PopCountI:
 3018     case Op_PopCountL:
 3019       if (!UsePopCountInstruction) {
 3020         return false;
 3021       }
 3022       break;
 3023     case Op_PopCountVI:
 3024       if (UseAVX < 2) {
 3025         return false;
 3026       }
 3027       break;
 3028     case Op_CompressV:
 3029     case Op_ExpandV:
 3030     case Op_PopCountVL:
 3031       if (UseAVX < 2) {
 3032         return false;
 3033       }
 3034       break;
 3035     case Op_MulVI:
 3036       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3037         return false;
 3038       }
 3039       break;
 3040     case Op_MulVL:
 3041       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3042         return false;
 3043       }
 3044       break;
 3045     case Op_MulReductionVL:
 3046       if (VM_Version::supports_avx512dq() == false) {
 3047         return false;
 3048       }
 3049       break;
 3050     case Op_AbsVB:
 3051     case Op_AbsVS:
 3052     case Op_AbsVI:
 3053     case Op_AddReductionVI:
 3054     case Op_AndReductionV:
 3055     case Op_OrReductionV:
 3056     case Op_XorReductionV:
 3057       if (UseSSE < 3) { // requires at least SSSE3
 3058         return false;
 3059       }
 3060       break;
 3061     case Op_MaxHF:
 3062     case Op_MinHF:
 3063       if (!VM_Version::supports_avx512vlbw()) {
 3064         return false;
 3065       }  // fallthrough
 3066     case Op_AddHF:
 3067     case Op_DivHF:
 3068     case Op_FmaHF:
 3069     case Op_MulHF:
 3070     case Op_ReinterpretS2HF:
 3071     case Op_ReinterpretHF2S:
 3072     case Op_SubHF:
 3073     case Op_SqrtHF:
 3074       if (!VM_Version::supports_avx512_fp16()) {
 3075         return false;
 3076       }
 3077       break;
 3078     case Op_VectorLoadShuffle:
 3079     case Op_VectorRearrange:
 3080     case Op_MulReductionVI:
 3081       if (UseSSE < 4) { // requires at least SSE4
 3082         return false;
 3083       }
 3084       break;
 3085     case Op_IsInfiniteF:
 3086     case Op_IsInfiniteD:
 3087       if (!VM_Version::supports_avx512dq()) {
 3088         return false;
 3089       }
 3090       break;
 3091     case Op_SqrtVD:
 3092     case Op_SqrtVF:
 3093     case Op_VectorMaskCmp:
 3094     case Op_VectorCastB2X:
 3095     case Op_VectorCastS2X:
 3096     case Op_VectorCastI2X:
 3097     case Op_VectorCastL2X:
 3098     case Op_VectorCastF2X:
 3099     case Op_VectorCastD2X:
 3100     case Op_VectorUCastB2X:
 3101     case Op_VectorUCastS2X:
 3102     case Op_VectorUCastI2X:
 3103     case Op_VectorMaskCast:
 3104       if (UseAVX < 1) { // enabled for AVX only
 3105         return false;
 3106       }
 3107       break;
 3108     case Op_PopulateIndex:
 3109       if (UseAVX < 2) {
 3110         return false;
 3111       }
 3112       break;
 3113     case Op_RoundVF:
 3114       if (UseAVX < 2) { // enabled for AVX2 only
 3115         return false;
 3116       }
 3117       break;
 3118     case Op_RoundVD:
 3119       if (UseAVX < 3) {
 3120         return false;  // enabled for AVX3 only
 3121       }
 3122       break;
 3123     case Op_CompareAndSwapL:
 3124     case Op_CompareAndSwapP:
 3125       break;
 3126     case Op_StrIndexOf:
 3127       if (!UseSSE42Intrinsics) {
 3128         return false;
 3129       }
 3130       break;
 3131     case Op_StrIndexOfChar:
 3132       if (!UseSSE42Intrinsics) {
 3133         return false;
 3134       }
 3135       break;
 3136     case Op_OnSpinWait:
 3137       if (VM_Version::supports_on_spin_wait() == false) {
 3138         return false;
 3139       }
 3140       break;
 3141     case Op_MulVB:
 3142     case Op_LShiftVB:
 3143     case Op_RShiftVB:
 3144     case Op_URShiftVB:
 3145     case Op_VectorInsert:
 3146     case Op_VectorLoadMask:
 3147     case Op_VectorStoreMask:
 3148     case Op_VectorBlend:
 3149       if (UseSSE < 4) {
 3150         return false;
 3151       }
 3152       break;
 3153     case Op_MaxD:
 3154     case Op_MaxF:
 3155     case Op_MinD:
 3156     case Op_MinF:
 3157       if (UseAVX < 1) { // enabled for AVX only
 3158         return false;
 3159       }
 3160       break;
 3161     case Op_CacheWB:
 3162     case Op_CacheWBPreSync:
 3163     case Op_CacheWBPostSync:
 3164       if (!VM_Version::supports_data_cache_line_flush()) {
 3165         return false;
 3166       }
 3167       break;
 3168     case Op_ExtractB:
 3169     case Op_ExtractL:
 3170     case Op_ExtractI:
 3171     case Op_RoundDoubleMode:
 3172       if (UseSSE < 4) {
 3173         return false;
 3174       }
 3175       break;
 3176     case Op_RoundDoubleModeV:
 3177       if (VM_Version::supports_avx() == false) {
 3178         return false; // 128bit vroundpd is not available
 3179       }
 3180       break;
 3181     case Op_LoadVectorGather:
 3182     case Op_LoadVectorGatherMasked:
 3183       if (UseAVX < 2) {
 3184         return false;
 3185       }
 3186       break;
 3187     case Op_FmaF:
 3188     case Op_FmaD:
 3189     case Op_FmaVD:
 3190     case Op_FmaVF:
 3191       if (!UseFMA) {
 3192         return false;
 3193       }
 3194       break;
 3195     case Op_MacroLogicV:
 3196       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3197         return false;
 3198       }
 3199       break;
 3200 
 3201     case Op_VectorCmpMasked:
 3202     case Op_VectorMaskGen:
 3203       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3204         return false;
 3205       }
 3206       break;
 3207     case Op_VectorMaskFirstTrue:
 3208     case Op_VectorMaskLastTrue:
 3209     case Op_VectorMaskTrueCount:
 3210     case Op_VectorMaskToLong:
 3211       if (UseAVX < 1) {
 3212          return false;
 3213       }
 3214       break;
 3215     case Op_RoundF:
 3216     case Op_RoundD:
 3217       break;
 3218     case Op_CopySignD:
 3219     case Op_CopySignF:
 3220       if (UseAVX < 3)  {
 3221         return false;
 3222       }
 3223       if (!VM_Version::supports_avx512vl()) {
 3224         return false;
 3225       }
 3226       break;
 3227     case Op_CompressBits:
 3228     case Op_ExpandBits:
 3229       if (!VM_Version::supports_bmi2()) {
 3230         return false;
 3231       }
 3232       break;
 3233     case Op_CompressM:
 3234       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3235         return false;
 3236       }
 3237       break;
 3238     case Op_ConvF2HF:
 3239     case Op_ConvHF2F:
 3240       if (!VM_Version::supports_float16()) {
 3241         return false;
 3242       }
 3243       break;
 3244     case Op_VectorCastF2HF:
 3245     case Op_VectorCastHF2F:
 3246       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3247         return false;
 3248       }
 3249       break;
 3250   }
 3251   return true;  // Match rules are supported by default.
 3252 }
 3253 
 3254 //------------------------------------------------------------------------
 3255 
 3256 static inline bool is_pop_count_instr_target(BasicType bt) {
 3257   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3258          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3259 }
 3260 
 3261 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3262   return match_rule_supported_vector(opcode, vlen, bt);
 3263 }
 3264 
 3265 // Identify extra cases that we might want to provide match rules for vector nodes and
 3266 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3267 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3268   if (!match_rule_supported(opcode)) {
 3269     return false;
 3270   }
 3271   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3272   //   * SSE2 supports 128bit vectors for all types;
 3273   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3274   //   * AVX2 supports 256bit vectors for all types;
 3275   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3276   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3277   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3278   // And MaxVectorSize is taken into account as well.
 3279   if (!vector_size_supported(bt, vlen)) {
 3280     return false;
 3281   }
 3282   // Special cases which require vector length follow:
 3283   //   * implementation limitations
 3284   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3285   //   * 128bit vroundpd instruction is present only in AVX1
 3286   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3287   switch (opcode) {
 3288     case Op_MaxVHF:
 3289     case Op_MinVHF:
 3290       if (!VM_Version::supports_avx512bw()) {
 3291         return false;
 3292       }
 3293     case Op_AddVHF:
 3294     case Op_DivVHF:
 3295     case Op_FmaVHF:
 3296     case Op_MulVHF:
 3297     case Op_SubVHF:
 3298     case Op_SqrtVHF:
 3299       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3300         return false;
 3301       }
 3302       if (!VM_Version::supports_avx512_fp16()) {
 3303         return false;
 3304       }
 3305       break;
 3306     case Op_AbsVF:
 3307     case Op_NegVF:
 3308       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3309         return false; // 512bit vandps and vxorps are not available
 3310       }
 3311       break;
 3312     case Op_AbsVD:
 3313     case Op_NegVD:
 3314       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3315         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3316       }
 3317       break;
 3318     case Op_RotateRightV:
 3319     case Op_RotateLeftV:
 3320       if (bt != T_INT && bt != T_LONG) {
 3321         return false;
 3322       } // fallthrough
 3323     case Op_MacroLogicV:
 3324       if (!VM_Version::supports_evex() ||
 3325           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3326         return false;
 3327       }
 3328       break;
 3329     case Op_ClearArray:
 3330     case Op_VectorMaskGen:
 3331     case Op_VectorCmpMasked:
 3332       if (!VM_Version::supports_avx512bw()) {
 3333         return false;
 3334       }
 3335       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3336         return false;
 3337       }
 3338       break;
 3339     case Op_LoadVectorMasked:
 3340     case Op_StoreVectorMasked:
 3341       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3342         return false;
 3343       }
 3344       break;
 3345     case Op_UMinV:
 3346     case Op_UMaxV:
 3347       if (UseAVX == 0) {
 3348         return false;
 3349       }
 3350       break;
 3351     case Op_UMinReductionV:
 3352     case Op_UMaxReductionV:
 3353       if (UseAVX == 0) {
 3354         return false;
 3355       }
 3356       if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
 3357         return false;
 3358       }
 3359       if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
 3360         return false;
 3361       }
 3362       break;
 3363     case Op_MaxV:
 3364     case Op_MinV:
 3365       if (UseSSE < 4 && is_integral_type(bt)) {
 3366         return false;
 3367       }
 3368       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3369           // Float/Double intrinsics are enabled for AVX family currently.
 3370           if (UseAVX == 0) {
 3371             return false;
 3372           }
 3373           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3374             return false;
 3375           }
 3376       }
 3377       break;
 3378     case Op_CallLeafVector:
 3379       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3380         return false;
 3381       }
 3382       break;
 3383     case Op_AddReductionVI:
 3384       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3385         return false;
 3386       }
 3387       // fallthrough
 3388     case Op_AndReductionV:
 3389     case Op_OrReductionV:
 3390     case Op_XorReductionV:
 3391       if (is_subword_type(bt) && (UseSSE < 4)) {
 3392         return false;
 3393       }
 3394       break;
 3395     case Op_MinReductionV:
 3396     case Op_MaxReductionV:
 3397       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3398         return false;
 3399       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3400         return false;
 3401       }
 3402       // Float/Double intrinsics enabled for AVX family.
 3403       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3404         return false;
 3405       }
 3406       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3407         return false;
 3408       }
 3409       break;
 3410     case Op_VectorBlend:
 3411       if (UseAVX == 0 && size_in_bits < 128) {
 3412         return false;
 3413       }
 3414       break;
 3415     case Op_VectorTest:
 3416       if (UseSSE < 4) {
 3417         return false; // Implementation limitation
 3418       } else if (size_in_bits < 32) {
 3419         return false; // Implementation limitation
 3420       }
 3421       break;
 3422     case Op_VectorLoadShuffle:
 3423     case Op_VectorRearrange:
 3424       if(vlen == 2) {
 3425         return false; // Implementation limitation due to how shuffle is loaded
 3426       } else if (size_in_bits == 256 && UseAVX < 2) {
 3427         return false; // Implementation limitation
 3428       }
 3429       break;
 3430     case Op_VectorLoadMask:
 3431     case Op_VectorMaskCast:
 3432       if (size_in_bits == 256 && UseAVX < 2) {
 3433         return false; // Implementation limitation
 3434       }
 3435       // fallthrough
 3436     case Op_VectorStoreMask:
 3437       if (vlen == 2) {
 3438         return false; // Implementation limitation
 3439       }
 3440       break;
 3441     case Op_PopulateIndex:
 3442       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3443         return false;
 3444       }
 3445       break;
 3446     case Op_VectorCastB2X:
 3447     case Op_VectorCastS2X:
 3448     case Op_VectorCastI2X:
 3449       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3450         return false;
 3451       }
 3452       break;
 3453     case Op_VectorCastL2X:
 3454       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3455         return false;
 3456       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3457         return false;
 3458       }
 3459       break;
 3460     case Op_VectorCastF2X: {
 3461         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3462         // happen after intermediate conversion to integer and special handling
 3463         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3464         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3465         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3466           return false;
 3467         }
 3468       }
 3469       // fallthrough
 3470     case Op_VectorCastD2X:
 3471       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3472         return false;
 3473       }
 3474       break;
 3475     case Op_VectorCastF2HF:
 3476     case Op_VectorCastHF2F:
 3477       if (!VM_Version::supports_f16c() &&
 3478          ((!VM_Version::supports_evex() ||
 3479          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3480         return false;
 3481       }
 3482       break;
 3483     case Op_RoundVD:
 3484       if (!VM_Version::supports_avx512dq()) {
 3485         return false;
 3486       }
 3487       break;
 3488     case Op_MulReductionVI:
 3489       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3490         return false;
 3491       }
 3492       break;
 3493     case Op_LoadVectorGatherMasked:
 3494       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3495         return false;
 3496       }
 3497       if (is_subword_type(bt) &&
 3498          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3499           (size_in_bits < 64)                                      ||
 3500           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3501         return false;
 3502       }
 3503       break;
 3504     case Op_StoreVectorScatterMasked:
 3505     case Op_StoreVectorScatter:
 3506       if (is_subword_type(bt)) {
 3507         return false;
 3508       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3509         return false;
 3510       }
 3511       // fallthrough
 3512     case Op_LoadVectorGather:
 3513       if (!is_subword_type(bt) && size_in_bits == 64) {
 3514         return false;
 3515       }
 3516       if (is_subword_type(bt) && size_in_bits < 64) {
 3517         return false;
 3518       }
 3519       break;
 3520     case Op_SaturatingAddV:
 3521     case Op_SaturatingSubV:
 3522       if (UseAVX < 1) {
 3523         return false; // Implementation limitation
 3524       }
 3525       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3526         return false;
 3527       }
 3528       break;
 3529     case Op_SelectFromTwoVector:
 3530        if (size_in_bits < 128) {
 3531          return false;
 3532        }
 3533        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3534          return false;
 3535        }
 3536        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3537          return false;
 3538        }
 3539        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3540          return false;
 3541        }
 3542        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3543          return false;
 3544        }
 3545        break;
 3546     case Op_MaskAll:
 3547       if (!VM_Version::supports_evex()) {
 3548         return false;
 3549       }
 3550       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3551         return false;
 3552       }
 3553       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3554         return false;
 3555       }
 3556       break;
 3557     case Op_VectorMaskCmp:
 3558       if (vlen < 2 || size_in_bits < 32) {
 3559         return false;
 3560       }
 3561       break;
 3562     case Op_CompressM:
 3563       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3564         return false;
 3565       }
 3566       break;
 3567     case Op_CompressV:
 3568     case Op_ExpandV:
 3569       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3570         return false;
 3571       }
 3572       if (size_in_bits < 128 ) {
 3573         return false;
 3574       }
 3575     case Op_VectorLongToMask:
 3576       if (UseAVX < 1) {
 3577         return false;
 3578       }
 3579       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3580         return false;
 3581       }
 3582       break;
 3583     case Op_SignumVD:
 3584     case Op_SignumVF:
 3585       if (UseAVX < 1) {
 3586         return false;
 3587       }
 3588       break;
 3589     case Op_PopCountVI:
 3590     case Op_PopCountVL: {
 3591         if (!is_pop_count_instr_target(bt) &&
 3592             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3593           return false;
 3594         }
 3595       }
 3596       break;
 3597     case Op_ReverseV:
 3598     case Op_ReverseBytesV:
 3599       if (UseAVX < 2) {
 3600         return false;
 3601       }
 3602       break;
 3603     case Op_CountTrailingZerosV:
 3604     case Op_CountLeadingZerosV:
 3605       if (UseAVX < 2) {
 3606         return false;
 3607       }
 3608       break;
 3609   }
 3610   return true;  // Per default match rules are supported.
 3611 }
 3612 
 3613 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3614   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3615   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3616   // of their non-masked counterpart with mask edge being the differentiator.
 3617   // This routine does a strict check on the existence of masked operation patterns
 3618   // by returning a default false value for all the other opcodes apart from the
 3619   // ones whose masked instruction patterns are defined in this file.
 3620   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3621     return false;
 3622   }
 3623 
 3624   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3625   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3626     return false;
 3627   }
 3628   switch(opcode) {
 3629     // Unary masked operations
 3630     case Op_AbsVB:
 3631     case Op_AbsVS:
 3632       if(!VM_Version::supports_avx512bw()) {
 3633         return false;  // Implementation limitation
 3634       }
 3635     case Op_AbsVI:
 3636     case Op_AbsVL:
 3637       return true;
 3638 
 3639     // Ternary masked operations
 3640     case Op_FmaVF:
 3641     case Op_FmaVD:
 3642       return true;
 3643 
 3644     case Op_MacroLogicV:
 3645       if(bt != T_INT && bt != T_LONG) {
 3646         return false;
 3647       }
 3648       return true;
 3649 
 3650     // Binary masked operations
 3651     case Op_AddVB:
 3652     case Op_AddVS:
 3653     case Op_SubVB:
 3654     case Op_SubVS:
 3655     case Op_MulVS:
 3656     case Op_LShiftVS:
 3657     case Op_RShiftVS:
 3658     case Op_URShiftVS:
 3659       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3660       if (!VM_Version::supports_avx512bw()) {
 3661         return false;  // Implementation limitation
 3662       }
 3663       return true;
 3664 
 3665     case Op_MulVL:
 3666       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3667       if (!VM_Version::supports_avx512dq()) {
 3668         return false;  // Implementation limitation
 3669       }
 3670       return true;
 3671 
 3672     case Op_AndV:
 3673     case Op_OrV:
 3674     case Op_XorV:
 3675     case Op_RotateRightV:
 3676     case Op_RotateLeftV:
 3677       if (bt != T_INT && bt != T_LONG) {
 3678         return false; // Implementation limitation
 3679       }
 3680       return true;
 3681 
 3682     case Op_VectorLoadMask:
 3683       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3684       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3685         return false;
 3686       }
 3687       return true;
 3688 
 3689     case Op_AddVI:
 3690     case Op_AddVL:
 3691     case Op_AddVF:
 3692     case Op_AddVD:
 3693     case Op_SubVI:
 3694     case Op_SubVL:
 3695     case Op_SubVF:
 3696     case Op_SubVD:
 3697     case Op_MulVI:
 3698     case Op_MulVF:
 3699     case Op_MulVD:
 3700     case Op_DivVF:
 3701     case Op_DivVD:
 3702     case Op_SqrtVF:
 3703     case Op_SqrtVD:
 3704     case Op_LShiftVI:
 3705     case Op_LShiftVL:
 3706     case Op_RShiftVI:
 3707     case Op_RShiftVL:
 3708     case Op_URShiftVI:
 3709     case Op_URShiftVL:
 3710     case Op_LoadVectorMasked:
 3711     case Op_StoreVectorMasked:
 3712     case Op_LoadVectorGatherMasked:
 3713     case Op_StoreVectorScatterMasked:
 3714       return true;
 3715 
 3716     case Op_UMinV:
 3717     case Op_UMaxV:
 3718       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3719         return false;
 3720       } // fallthrough
 3721     case Op_MaxV:
 3722     case Op_MinV:
 3723       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3724         return false; // Implementation limitation
 3725       }
 3726       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3727         return false; // Implementation limitation
 3728       }
 3729       return true;
 3730     case Op_SaturatingAddV:
 3731     case Op_SaturatingSubV:
 3732       if (!is_subword_type(bt)) {
 3733         return false;
 3734       }
 3735       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3736         return false; // Implementation limitation
 3737       }
 3738       return true;
 3739 
 3740     case Op_VectorMaskCmp:
 3741       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3742         return false; // Implementation limitation
 3743       }
 3744       return true;
 3745 
 3746     case Op_VectorRearrange:
 3747       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3748         return false; // Implementation limitation
 3749       }
 3750       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3751         return false; // Implementation limitation
 3752       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3753         return false; // Implementation limitation
 3754       }
 3755       return true;
 3756 
 3757     // Binary Logical operations
 3758     case Op_AndVMask:
 3759     case Op_OrVMask:
 3760     case Op_XorVMask:
 3761       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3762         return false; // Implementation limitation
 3763       }
 3764       return true;
 3765 
 3766     case Op_PopCountVI:
 3767     case Op_PopCountVL:
 3768       if (!is_pop_count_instr_target(bt)) {
 3769         return false;
 3770       }
 3771       return true;
 3772 
 3773     case Op_MaskAll:
 3774       return true;
 3775 
 3776     case Op_CountLeadingZerosV:
 3777       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3778         return true;
 3779       }
 3780     default:
 3781       return false;
 3782   }
 3783 }
 3784 
 3785 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3786   return false;
 3787 }
 3788 
 3789 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3790 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3791   switch (elem_bt) {
 3792     case T_BYTE:  return false;
 3793     case T_SHORT: return !VM_Version::supports_avx512bw();
 3794     case T_INT:   return !VM_Version::supports_avx();
 3795     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3796     default:
 3797       ShouldNotReachHere();
 3798       return false;
 3799   }
 3800 }
 3801 
 3802 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3803   // Prefer predicate if the mask type is "TypeVectMask".
 3804   return vt->isa_vectmask() != nullptr;
 3805 }
 3806 
 3807 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3808   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3809   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3810   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3811       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3812     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3813     return new legVecZOper();
 3814   }
 3815   if (legacy) {
 3816     switch (ideal_reg) {
 3817       case Op_VecS: return new legVecSOper();
 3818       case Op_VecD: return new legVecDOper();
 3819       case Op_VecX: return new legVecXOper();
 3820       case Op_VecY: return new legVecYOper();
 3821       case Op_VecZ: return new legVecZOper();
 3822     }
 3823   } else {
 3824     switch (ideal_reg) {
 3825       case Op_VecS: return new vecSOper();
 3826       case Op_VecD: return new vecDOper();
 3827       case Op_VecX: return new vecXOper();
 3828       case Op_VecY: return new vecYOper();
 3829       case Op_VecZ: return new vecZOper();
 3830     }
 3831   }
 3832   ShouldNotReachHere();
 3833   return nullptr;
 3834 }
 3835 
 3836 bool Matcher::is_reg2reg_move(MachNode* m) {
 3837   switch (m->rule()) {
 3838     case MoveVec2Leg_rule:
 3839     case MoveLeg2Vec_rule:
 3840     case MoveF2VL_rule:
 3841     case MoveF2LEG_rule:
 3842     case MoveVL2F_rule:
 3843     case MoveLEG2F_rule:
 3844     case MoveD2VL_rule:
 3845     case MoveD2LEG_rule:
 3846     case MoveVL2D_rule:
 3847     case MoveLEG2D_rule:
 3848       return true;
 3849     default:
 3850       return false;
 3851   }
 3852 }
 3853 
 3854 bool Matcher::is_generic_vector(MachOper* opnd) {
 3855   switch (opnd->opcode()) {
 3856     case VEC:
 3857     case LEGVEC:
 3858       return true;
 3859     default:
 3860       return false;
 3861   }
 3862 }
 3863 
 3864 //------------------------------------------------------------------------
 3865 
 3866 const RegMask* Matcher::predicate_reg_mask(void) {
 3867   return &_VECTMASK_REG_mask;
 3868 }
 3869 
 3870 // Max vector size in bytes. 0 if not supported.
 3871 int Matcher::vector_width_in_bytes(BasicType bt) {
 3872   assert(is_java_primitive(bt), "only primitive type vectors");
 3873   // SSE2 supports 128bit vectors for all types.
 3874   // AVX2 supports 256bit vectors for all types.
 3875   // AVX2/EVEX supports 512bit vectors for all types.
 3876   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3877   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3878   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3879     size = (UseAVX > 2) ? 64 : 32;
 3880   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3881     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3882   // Use flag to limit vector size.
 3883   size = MIN2(size,(int)MaxVectorSize);
 3884   // Minimum 2 values in vector (or 4 for bytes).
 3885   switch (bt) {
 3886   case T_DOUBLE:
 3887   case T_LONG:
 3888     if (size < 16) return 0;
 3889     break;
 3890   case T_FLOAT:
 3891   case T_INT:
 3892     if (size < 8) return 0;
 3893     break;
 3894   case T_BOOLEAN:
 3895     if (size < 4) return 0;
 3896     break;
 3897   case T_CHAR:
 3898     if (size < 4) return 0;
 3899     break;
 3900   case T_BYTE:
 3901     if (size < 4) return 0;
 3902     break;
 3903   case T_SHORT:
 3904     if (size < 4) return 0;
 3905     break;
 3906   default:
 3907     ShouldNotReachHere();
 3908   }
 3909   return size;
 3910 }
 3911 
 3912 // Limits on vector size (number of elements) loaded into vector.
 3913 int Matcher::max_vector_size(const BasicType bt) {
 3914   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3915 }
 3916 int Matcher::min_vector_size(const BasicType bt) {
 3917   int max_size = max_vector_size(bt);
 3918   // Min size which can be loaded into vector is 4 bytes.
 3919   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3920   // Support for calling svml double64 vectors
 3921   if (bt == T_DOUBLE) {
 3922     size = 1;
 3923   }
 3924   return MIN2(size,max_size);
 3925 }
 3926 
 3927 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3928   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3929   // by default on Cascade Lake
 3930   if (VM_Version::is_default_intel_cascade_lake()) {
 3931     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3932   }
 3933   return Matcher::max_vector_size(bt);
 3934 }
 3935 
 3936 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3937   return -1;
 3938 }
 3939 
 3940 // Vector ideal reg corresponding to specified size in bytes
 3941 uint Matcher::vector_ideal_reg(int size) {
 3942   assert(MaxVectorSize >= size, "");
 3943   switch(size) {
 3944     case  4: return Op_VecS;
 3945     case  8: return Op_VecD;
 3946     case 16: return Op_VecX;
 3947     case 32: return Op_VecY;
 3948     case 64: return Op_VecZ;
 3949   }
 3950   ShouldNotReachHere();
 3951   return 0;
 3952 }
 3953 
 3954 // Check for shift by small constant as well
 3955 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3956   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3957       shift->in(2)->get_int() <= 3 &&
 3958       // Are there other uses besides address expressions?
 3959       !matcher->is_visited(shift)) {
 3960     address_visited.set(shift->_idx); // Flag as address_visited
 3961     mstack.push(shift->in(2), Matcher::Visit);
 3962     Node *conv = shift->in(1);
 3963     // Allow Matcher to match the rule which bypass
 3964     // ConvI2L operation for an array index on LP64
 3965     // if the index value is positive.
 3966     if (conv->Opcode() == Op_ConvI2L &&
 3967         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3968         // Are there other uses besides address expressions?
 3969         !matcher->is_visited(conv)) {
 3970       address_visited.set(conv->_idx); // Flag as address_visited
 3971       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3972     } else {
 3973       mstack.push(conv, Matcher::Pre_Visit);
 3974     }
 3975     return true;
 3976   }
 3977   return false;
 3978 }
 3979 
 3980 // This function identifies sub-graphs in which a 'load' node is
 3981 // input to two different nodes, and such that it can be matched
 3982 // with BMI instructions like blsi, blsr, etc.
 3983 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3984 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3985 // refers to the same node.
 3986 //
 3987 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3988 // This is a temporary solution until we make DAGs expressible in ADL.
 3989 template<typename ConType>
 3990 class FusedPatternMatcher {
 3991   Node* _op1_node;
 3992   Node* _mop_node;
 3993   int _con_op;
 3994 
 3995   static int match_next(Node* n, int next_op, int next_op_idx) {
 3996     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3997       return -1;
 3998     }
 3999 
 4000     if (next_op_idx == -1) { // n is commutative, try rotations
 4001       if (n->in(1)->Opcode() == next_op) {
 4002         return 1;
 4003       } else if (n->in(2)->Opcode() == next_op) {
 4004         return 2;
 4005       }
 4006     } else {
 4007       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 4008       if (n->in(next_op_idx)->Opcode() == next_op) {
 4009         return next_op_idx;
 4010       }
 4011     }
 4012     return -1;
 4013   }
 4014 
 4015  public:
 4016   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 4017     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 4018 
 4019   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 4020              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4021              typename ConType::NativeType con_value) {
 4022     if (_op1_node->Opcode() != op1) {
 4023       return false;
 4024     }
 4025     if (_mop_node->outcnt() > 2) {
 4026       return false;
 4027     }
 4028     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4029     if (op1_op2_idx == -1) {
 4030       return false;
 4031     }
 4032     // Memory operation must be the other edge
 4033     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4034 
 4035     // Check that the mop node is really what we want
 4036     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4037       Node* op2_node = _op1_node->in(op1_op2_idx);
 4038       if (op2_node->outcnt() > 1) {
 4039         return false;
 4040       }
 4041       assert(op2_node->Opcode() == op2, "Should be");
 4042       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4043       if (op2_con_idx == -1) {
 4044         return false;
 4045       }
 4046       // Memory operation must be the other edge
 4047       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4048       // Check that the memory operation is the same node
 4049       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4050         // Now check the constant
 4051         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4052         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4053           return true;
 4054         }
 4055       }
 4056     }
 4057     return false;
 4058   }
 4059 };
 4060 
 4061 static bool is_bmi_pattern(Node* n, Node* m) {
 4062   assert(UseBMI1Instructions, "sanity");
 4063   if (n != nullptr && m != nullptr) {
 4064     if (m->Opcode() == Op_LoadI) {
 4065       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4066       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4067              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4068              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4069     } else if (m->Opcode() == Op_LoadL) {
 4070       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4071       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4072              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4073              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4074     }
 4075   }
 4076   return false;
 4077 }
 4078 
 4079 // Should the matcher clone input 'm' of node 'n'?
 4080 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4081   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4082   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
 4083     mstack.push(m, Visit);
 4084     return true;
 4085   }
 4086   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4087     mstack.push(m, Visit);           // m = ShiftCntV
 4088     return true;
 4089   }
 4090   if (is_encode_and_store_pattern(n, m)) {
 4091     mstack.push(m, Visit);
 4092     return true;
 4093   }
 4094   return false;
 4095 }
 4096 
 4097 // Should the Matcher clone shifts on addressing modes, expecting them
 4098 // to be subsumed into complex addressing expressions or compute them
 4099 // into registers?
 4100 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4101   Node *off = m->in(AddPNode::Offset);
 4102   if (off->is_Con()) {
 4103     address_visited.test_set(m->_idx); // Flag as address_visited
 4104     Node *adr = m->in(AddPNode::Address);
 4105 
 4106     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4107     // AtomicAdd is not an addressing expression.
 4108     // Cheap to find it by looking for screwy base.
 4109     if (adr->is_AddP() &&
 4110         !adr->in(AddPNode::Base)->is_top() &&
 4111         !adr->in(AddPNode::Offset)->is_Con() &&
 4112         off->get_long() == (int) (off->get_long()) && // immL32
 4113         // Are there other uses besides address expressions?
 4114         !is_visited(adr)) {
 4115       address_visited.set(adr->_idx); // Flag as address_visited
 4116       Node *shift = adr->in(AddPNode::Offset);
 4117       if (!clone_shift(shift, this, mstack, address_visited)) {
 4118         mstack.push(shift, Pre_Visit);
 4119       }
 4120       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4121       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4122     } else {
 4123       mstack.push(adr, Pre_Visit);
 4124     }
 4125 
 4126     // Clone X+offset as it also folds into most addressing expressions
 4127     mstack.push(off, Visit);
 4128     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4129     return true;
 4130   } else if (clone_shift(off, this, mstack, address_visited)) {
 4131     address_visited.test_set(m->_idx); // Flag as address_visited
 4132     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4133     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4134     return true;
 4135   }
 4136   return false;
 4137 }
 4138 
 4139 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4140   switch (bt) {
 4141     case BoolTest::eq:
 4142       return Assembler::eq;
 4143     case BoolTest::ne:
 4144       return Assembler::neq;
 4145     case BoolTest::le:
 4146     case BoolTest::ule:
 4147       return Assembler::le;
 4148     case BoolTest::ge:
 4149     case BoolTest::uge:
 4150       return Assembler::nlt;
 4151     case BoolTest::lt:
 4152     case BoolTest::ult:
 4153       return Assembler::lt;
 4154     case BoolTest::gt:
 4155     case BoolTest::ugt:
 4156       return Assembler::nle;
 4157     default : ShouldNotReachHere(); return Assembler::_false;
 4158   }
 4159 }
 4160 
 4161 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4162   switch (bt) {
 4163   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4164   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4165   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4166   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4167   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4168   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4169   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4170   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4171   }
 4172 }
 4173 
 4174 // Helper methods for MachSpillCopyNode::implementation().
 4175 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4176                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4177   assert(ireg == Op_VecS || // 32bit vector
 4178          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4179           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4180          "no non-adjacent vector moves" );
 4181   if (masm) {
 4182     switch (ireg) {
 4183     case Op_VecS: // copy whole register
 4184     case Op_VecD:
 4185     case Op_VecX:
 4186       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4187         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4188       } else {
 4189         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4190      }
 4191       break;
 4192     case Op_VecY:
 4193       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4194         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4195       } else {
 4196         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4197      }
 4198       break;
 4199     case Op_VecZ:
 4200       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4201       break;
 4202     default:
 4203       ShouldNotReachHere();
 4204     }
 4205 #ifndef PRODUCT
 4206   } else {
 4207     switch (ireg) {
 4208     case Op_VecS:
 4209     case Op_VecD:
 4210     case Op_VecX:
 4211       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4212       break;
 4213     case Op_VecY:
 4214     case Op_VecZ:
 4215       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4216       break;
 4217     default:
 4218       ShouldNotReachHere();
 4219     }
 4220 #endif
 4221   }
 4222 }
 4223 
 4224 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4225                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4226   if (masm) {
 4227     if (is_load) {
 4228       switch (ireg) {
 4229       case Op_VecS:
 4230         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4231         break;
 4232       case Op_VecD:
 4233         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4234         break;
 4235       case Op_VecX:
 4236         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4237           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4238         } else {
 4239           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4240           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4241         }
 4242         break;
 4243       case Op_VecY:
 4244         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4245           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4246         } else {
 4247           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4248           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4249         }
 4250         break;
 4251       case Op_VecZ:
 4252         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4253         break;
 4254       default:
 4255         ShouldNotReachHere();
 4256       }
 4257     } else { // store
 4258       switch (ireg) {
 4259       case Op_VecS:
 4260         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4261         break;
 4262       case Op_VecD:
 4263         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4264         break;
 4265       case Op_VecX:
 4266         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4267           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4268         }
 4269         else {
 4270           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4271         }
 4272         break;
 4273       case Op_VecY:
 4274         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4275           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4276         }
 4277         else {
 4278           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4279         }
 4280         break;
 4281       case Op_VecZ:
 4282         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4283         break;
 4284       default:
 4285         ShouldNotReachHere();
 4286       }
 4287     }
 4288 #ifndef PRODUCT
 4289   } else {
 4290     if (is_load) {
 4291       switch (ireg) {
 4292       case Op_VecS:
 4293         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4294         break;
 4295       case Op_VecD:
 4296         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4297         break;
 4298        case Op_VecX:
 4299         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4300         break;
 4301       case Op_VecY:
 4302       case Op_VecZ:
 4303         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4304         break;
 4305       default:
 4306         ShouldNotReachHere();
 4307       }
 4308     } else { // store
 4309       switch (ireg) {
 4310       case Op_VecS:
 4311         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4312         break;
 4313       case Op_VecD:
 4314         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4315         break;
 4316        case Op_VecX:
 4317         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4318         break;
 4319       case Op_VecY:
 4320       case Op_VecZ:
 4321         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4322         break;
 4323       default:
 4324         ShouldNotReachHere();
 4325       }
 4326     }
 4327 #endif
 4328   }
 4329 }
 4330 
 4331 template <class T>
 4332 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4333   int size = type2aelembytes(bt) * len;
 4334   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4335   for (int i = 0; i < len; i++) {
 4336     int offset = i * type2aelembytes(bt);
 4337     switch (bt) {
 4338       case T_BYTE: val->at(i) = con; break;
 4339       case T_SHORT: {
 4340         jshort c = con;
 4341         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4342         break;
 4343       }
 4344       case T_INT: {
 4345         jint c = con;
 4346         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4347         break;
 4348       }
 4349       case T_LONG: {
 4350         jlong c = con;
 4351         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4352         break;
 4353       }
 4354       case T_FLOAT: {
 4355         jfloat c = con;
 4356         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4357         break;
 4358       }
 4359       case T_DOUBLE: {
 4360         jdouble c = con;
 4361         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4362         break;
 4363       }
 4364       default: assert(false, "%s", type2name(bt));
 4365     }
 4366   }
 4367   return val;
 4368 }
 4369 
 4370 static inline jlong high_bit_set(BasicType bt) {
 4371   switch (bt) {
 4372     case T_BYTE:  return 0x8080808080808080;
 4373     case T_SHORT: return 0x8000800080008000;
 4374     case T_INT:   return 0x8000000080000000;
 4375     case T_LONG:  return 0x8000000000000000;
 4376     default:
 4377       ShouldNotReachHere();
 4378       return 0;
 4379   }
 4380 }
 4381 
 4382 #ifndef PRODUCT
 4383   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4384     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4385   }
 4386 #endif
 4387 
 4388   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4389     __ nop(_count);
 4390   }
 4391 
 4392   uint MachNopNode::size(PhaseRegAlloc*) const {
 4393     return _count;
 4394   }
 4395 
 4396 #ifndef PRODUCT
 4397   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4398     st->print("# breakpoint");
 4399   }
 4400 #endif
 4401 
 4402   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4403     __ int3();
 4404   }
 4405 
 4406   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4407     return MachNode::size(ra_);
 4408   }
 4409 
 4410 %}
 4411 
 4412 //----------ENCODING BLOCK-----------------------------------------------------
 4413 // This block specifies the encoding classes used by the compiler to
 4414 // output byte streams.  Encoding classes are parameterized macros
 4415 // used by Machine Instruction Nodes in order to generate the bit
 4416 // encoding of the instruction.  Operands specify their base encoding
 4417 // interface with the interface keyword.  There are currently
 4418 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4419 // COND_INTER.  REG_INTER causes an operand to generate a function
 4420 // which returns its register number when queried.  CONST_INTER causes
 4421 // an operand to generate a function which returns the value of the
 4422 // constant when queried.  MEMORY_INTER causes an operand to generate
 4423 // four functions which return the Base Register, the Index Register,
 4424 // the Scale Value, and the Offset Value of the operand when queried.
 4425 // COND_INTER causes an operand to generate six functions which return
 4426 // the encoding code (ie - encoding bits for the instruction)
 4427 // associated with each basic boolean condition for a conditional
 4428 // instruction.
 4429 //
 4430 // Instructions specify two basic values for encoding.  Again, a
 4431 // function is available to check if the constant displacement is an
 4432 // oop. They use the ins_encode keyword to specify their encoding
 4433 // classes (which must be a sequence of enc_class names, and their
 4434 // parameters, specified in the encoding block), and they use the
 4435 // opcode keyword to specify, in order, their primary, secondary, and
 4436 // tertiary opcode.  Only the opcode sections which a particular
 4437 // instruction needs for encoding need to be specified.
 4438 encode %{
 4439   enc_class cdql_enc(no_rax_rdx_RegI div)
 4440   %{
 4441     // Full implementation of Java idiv and irem; checks for
 4442     // special case as described in JVM spec., p.243 & p.271.
 4443     //
 4444     //         normal case                           special case
 4445     //
 4446     // input : rax: dividend                         min_int
 4447     //         reg: divisor                          -1
 4448     //
 4449     // output: rax: quotient  (= rax idiv reg)       min_int
 4450     //         rdx: remainder (= rax irem reg)       0
 4451     //
 4452     //  Code sequnce:
 4453     //
 4454     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4455     //    5:   75 07/08                jne    e <normal>
 4456     //    7:   33 d2                   xor    %edx,%edx
 4457     //  [div >= 8 -> offset + 1]
 4458     //  [REX_B]
 4459     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4460     //    c:   74 03/04                je     11 <done>
 4461     // 000000000000000e <normal>:
 4462     //    e:   99                      cltd
 4463     //  [div >= 8 -> offset + 1]
 4464     //  [REX_B]
 4465     //    f:   f7 f9                   idiv   $div
 4466     // 0000000000000011 <done>:
 4467     Label normal;
 4468     Label done;
 4469 
 4470     // cmp    $0x80000000,%eax
 4471     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4472 
 4473     // jne    e <normal>
 4474     __ jccb(Assembler::notEqual, normal);
 4475 
 4476     // xor    %edx,%edx
 4477     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4478 
 4479     // cmp    $0xffffffffffffffff,%ecx
 4480     __ cmpl($div$$Register, -1);
 4481 
 4482     // je     11 <done>
 4483     __ jccb(Assembler::equal, done);
 4484 
 4485     // <normal>
 4486     // cltd
 4487     __ bind(normal);
 4488     __ cdql();
 4489 
 4490     // idivl
 4491     // <done>
 4492     __ idivl($div$$Register);
 4493     __ bind(done);
 4494   %}
 4495 
 4496   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4497   %{
 4498     // Full implementation of Java ldiv and lrem; checks for
 4499     // special case as described in JVM spec., p.243 & p.271.
 4500     //
 4501     //         normal case                           special case
 4502     //
 4503     // input : rax: dividend                         min_long
 4504     //         reg: divisor                          -1
 4505     //
 4506     // output: rax: quotient  (= rax idiv reg)       min_long
 4507     //         rdx: remainder (= rax irem reg)       0
 4508     //
 4509     //  Code sequnce:
 4510     //
 4511     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4512     //    7:   00 00 80
 4513     //    a:   48 39 d0                cmp    %rdx,%rax
 4514     //    d:   75 08                   jne    17 <normal>
 4515     //    f:   33 d2                   xor    %edx,%edx
 4516     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4517     //   15:   74 05                   je     1c <done>
 4518     // 0000000000000017 <normal>:
 4519     //   17:   48 99                   cqto
 4520     //   19:   48 f7 f9                idiv   $div
 4521     // 000000000000001c <done>:
 4522     Label normal;
 4523     Label done;
 4524 
 4525     // mov    $0x8000000000000000,%rdx
 4526     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4527 
 4528     // cmp    %rdx,%rax
 4529     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4530 
 4531     // jne    17 <normal>
 4532     __ jccb(Assembler::notEqual, normal);
 4533 
 4534     // xor    %edx,%edx
 4535     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4536 
 4537     // cmp    $0xffffffffffffffff,$div
 4538     __ cmpq($div$$Register, -1);
 4539 
 4540     // je     1e <done>
 4541     __ jccb(Assembler::equal, done);
 4542 
 4543     // <normal>
 4544     // cqto
 4545     __ bind(normal);
 4546     __ cdqq();
 4547 
 4548     // idivq (note: must be emitted by the user of this rule)
 4549     // <done>
 4550     __ idivq($div$$Register);
 4551     __ bind(done);
 4552   %}
 4553 
 4554   enc_class clear_avx %{
 4555     DEBUG_ONLY(int off0 = __ offset());
 4556     if (generate_vzeroupper(Compile::current())) {
 4557       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4558       // Clear upper bits of YMM registers when current compiled code uses
 4559       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4560       __ vzeroupper();
 4561     }
 4562     DEBUG_ONLY(int off1 = __ offset());
 4563     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4564   %}
 4565 
 4566   enc_class Java_To_Runtime(method meth) %{
 4567     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4568     __ call(r10);
 4569     __ post_call_nop();
 4570   %}
 4571 
 4572   enc_class Java_Static_Call(method meth)
 4573   %{
 4574     // JAVA STATIC CALL
 4575     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4576     // determine who we intended to call.
 4577     if (!_method) {
 4578       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4579     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4580       // The NOP here is purely to ensure that eliding a call to
 4581       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4582       __ addr_nop_5();
 4583       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4584     } else {
 4585       int method_index = resolved_method_index(masm);
 4586       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4587                                                   : static_call_Relocation::spec(method_index);
 4588       address mark = __ pc();
 4589       int call_offset = __ offset();
 4590       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4591       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4592         // Calls of the same statically bound method can share
 4593         // a stub to the interpreter.
 4594         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4595       } else {
 4596         // Emit stubs for static call.
 4597         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4598         __ clear_inst_mark();
 4599         if (stub == nullptr) {
 4600           ciEnv::current()->record_failure("CodeCache is full");
 4601           return;
 4602         }
 4603       }
 4604     }
 4605     __ post_call_nop();
 4606   %}
 4607 
 4608   enc_class Java_Dynamic_Call(method meth) %{
 4609     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4610     __ post_call_nop();
 4611   %}
 4612 
 4613   enc_class call_epilog %{
 4614     if (VerifyStackAtCalls) {
 4615       // Check that stack depth is unchanged: find majik cookie on stack
 4616       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4617       Label L;
 4618       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4619       __ jccb(Assembler::equal, L);
 4620       // Die if stack mismatch
 4621       __ int3();
 4622       __ bind(L);
 4623     }
 4624     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4625       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4626       // Search for the corresponding projection, get the register and emit code that initialized it.
 4627       uint con = (tf()->range_cc()->cnt() - 1);
 4628       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4629         ProjNode* proj = fast_out(i)->as_Proj();
 4630         if (proj->_con == con) {
 4631           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4632           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4633           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4634           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4635           __ testq(rax, rax);
 4636           __ setb(Assembler::notZero, toReg);
 4637           __ movzbl(toReg, toReg);
 4638           if (reg->is_stack()) {
 4639             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4640             __ movq(Address(rsp, st_off), toReg);
 4641           }
 4642           break;
 4643         }
 4644       }
 4645       if (return_value_is_used()) {
 4646         // An inline type is returned as fields in multiple registers.
 4647         // Rax either contains an oop if the inline type is buffered or a pointer
 4648         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4649         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4650         // rax &= (rax & 1) - 1
 4651         __ movptr(rscratch1, rax);
 4652         __ andptr(rscratch1, 0x1);
 4653         __ subptr(rscratch1, 0x1);
 4654         __ andptr(rax, rscratch1);
 4655       }
 4656     }
 4657   %}
 4658 
 4659 %}
 4660 
 4661 //----------FRAME--------------------------------------------------------------
 4662 // Definition of frame structure and management information.
 4663 //
 4664 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4665 //                             |   (to get allocators register number
 4666 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4667 //  r   CALLER     |        |
 4668 //  o     |        +--------+      pad to even-align allocators stack-slot
 4669 //  w     V        |  pad0  |        numbers; owned by CALLER
 4670 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4671 //  h     ^        |   in   |  5
 4672 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4673 //  |     |        |        |  3
 4674 //  |     |        +--------+
 4675 //  V     |        | old out|      Empty on Intel, window on Sparc
 4676 //        |    old |preserve|      Must be even aligned.
 4677 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4678 //        |        |   in   |  3   area for Intel ret address
 4679 //     Owned by    |preserve|      Empty on Sparc.
 4680 //       SELF      +--------+
 4681 //        |        |  pad2  |  2   pad to align old SP
 4682 //        |        +--------+  1
 4683 //        |        | locks  |  0
 4684 //        |        +--------+----> OptoReg::stack0(), even aligned
 4685 //        |        |  pad1  | 11   pad to align new SP
 4686 //        |        +--------+
 4687 //        |        |        | 10
 4688 //        |        | spills |  9   spills
 4689 //        V        |        |  8   (pad0 slot for callee)
 4690 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4691 //        ^        |  out   |  7
 4692 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4693 //     Owned by    +--------+
 4694 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4695 //        |    new |preserve|      Must be even-aligned.
 4696 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4697 //        |        |        |
 4698 //
 4699 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4700 //         known from SELF's arguments and the Java calling convention.
 4701 //         Region 6-7 is determined per call site.
 4702 // Note 2: If the calling convention leaves holes in the incoming argument
 4703 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4704 //         are owned by the CALLEE.  Holes should not be necessary in the
 4705 //         incoming area, as the Java calling convention is completely under
 4706 //         the control of the AD file.  Doubles can be sorted and packed to
 4707 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4708 //         varargs C calling conventions.
 4709 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4710 //         even aligned with pad0 as needed.
 4711 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4712 //         region 6-11 is even aligned; it may be padded out more so that
 4713 //         the region from SP to FP meets the minimum stack alignment.
 4714 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4715 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4716 //         SP meets the minimum alignment.
 4717 
 4718 frame
 4719 %{
 4720   // These three registers define part of the calling convention
 4721   // between compiled code and the interpreter.
 4722   inline_cache_reg(RAX);                // Inline Cache Register
 4723 
 4724   // Optional: name the operand used by cisc-spilling to access
 4725   // [stack_pointer + offset]
 4726   cisc_spilling_operand_name(indOffset32);
 4727 
 4728   // Number of stack slots consumed by locking an object
 4729   sync_stack_slots(2);
 4730 
 4731   // Compiled code's Frame Pointer
 4732   frame_pointer(RSP);
 4733 
 4734   // Stack alignment requirement
 4735   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4736 
 4737   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4738   // for calls to C.  Supports the var-args backing area for register parms.
 4739   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4740 
 4741   // The after-PROLOG location of the return address.  Location of
 4742   // return address specifies a type (REG or STACK) and a number
 4743   // representing the register number (i.e. - use a register name) or
 4744   // stack slot.
 4745   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4746   // Otherwise, it is above the locks and verification slot and alignment word
 4747   return_addr(STACK - 2 +
 4748               align_up((Compile::current()->in_preserve_stack_slots() +
 4749                         Compile::current()->fixed_slots()),
 4750                        stack_alignment_in_slots()));
 4751 
 4752   // Location of compiled Java return values.  Same as C for now.
 4753   return_value
 4754   %{
 4755     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4756            "only return normal values");
 4757 
 4758     static const int lo[Op_RegL + 1] = {
 4759       0,
 4760       0,
 4761       RAX_num,  // Op_RegN
 4762       RAX_num,  // Op_RegI
 4763       RAX_num,  // Op_RegP
 4764       XMM0_num, // Op_RegF
 4765       XMM0_num, // Op_RegD
 4766       RAX_num   // Op_RegL
 4767     };
 4768     static const int hi[Op_RegL + 1] = {
 4769       0,
 4770       0,
 4771       OptoReg::Bad, // Op_RegN
 4772       OptoReg::Bad, // Op_RegI
 4773       RAX_H_num,    // Op_RegP
 4774       OptoReg::Bad, // Op_RegF
 4775       XMM0b_num,    // Op_RegD
 4776       RAX_H_num     // Op_RegL
 4777     };
 4778     // Excluded flags and vector registers.
 4779     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4780     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4781   %}
 4782 %}
 4783 
 4784 //----------ATTRIBUTES---------------------------------------------------------
 4785 //----------Operand Attributes-------------------------------------------------
 4786 op_attrib op_cost(0);        // Required cost attribute
 4787 
 4788 //----------Instruction Attributes---------------------------------------------
 4789 ins_attrib ins_cost(100);       // Required cost attribute
 4790 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4791 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4792                                 // a non-matching short branch variant
 4793                                 // of some long branch?
 4794 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4795                                 // be a power of 2) specifies the
 4796                                 // alignment that some part of the
 4797                                 // instruction (not necessarily the
 4798                                 // start) requires.  If > 1, a
 4799                                 // compute_padding() function must be
 4800                                 // provided for the instruction
 4801 
 4802 // Whether this node is expanded during code emission into a sequence of
 4803 // instructions and the first instruction can perform an implicit null check.
 4804 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4805 
 4806 //----------OPERANDS-----------------------------------------------------------
 4807 // Operand definitions must precede instruction definitions for correct parsing
 4808 // in the ADLC because operands constitute user defined types which are used in
 4809 // instruction definitions.
 4810 
 4811 //----------Simple Operands----------------------------------------------------
 4812 // Immediate Operands
 4813 // Integer Immediate
 4814 operand immI()
 4815 %{
 4816   match(ConI);
 4817 
 4818   op_cost(10);
 4819   format %{ %}
 4820   interface(CONST_INTER);
 4821 %}
 4822 
 4823 // Constant for test vs zero
 4824 operand immI_0()
 4825 %{
 4826   predicate(n->get_int() == 0);
 4827   match(ConI);
 4828 
 4829   op_cost(0);
 4830   format %{ %}
 4831   interface(CONST_INTER);
 4832 %}
 4833 
 4834 // Constant for increment
 4835 operand immI_1()
 4836 %{
 4837   predicate(n->get_int() == 1);
 4838   match(ConI);
 4839 
 4840   op_cost(0);
 4841   format %{ %}
 4842   interface(CONST_INTER);
 4843 %}
 4844 
 4845 // Constant for decrement
 4846 operand immI_M1()
 4847 %{
 4848   predicate(n->get_int() == -1);
 4849   match(ConI);
 4850 
 4851   op_cost(0);
 4852   format %{ %}
 4853   interface(CONST_INTER);
 4854 %}
 4855 
 4856 operand immI_2()
 4857 %{
 4858   predicate(n->get_int() == 2);
 4859   match(ConI);
 4860 
 4861   op_cost(0);
 4862   format %{ %}
 4863   interface(CONST_INTER);
 4864 %}
 4865 
 4866 operand immI_4()
 4867 %{
 4868   predicate(n->get_int() == 4);
 4869   match(ConI);
 4870 
 4871   op_cost(0);
 4872   format %{ %}
 4873   interface(CONST_INTER);
 4874 %}
 4875 
 4876 operand immI_8()
 4877 %{
 4878   predicate(n->get_int() == 8);
 4879   match(ConI);
 4880 
 4881   op_cost(0);
 4882   format %{ %}
 4883   interface(CONST_INTER);
 4884 %}
 4885 
 4886 // Valid scale values for addressing modes
 4887 operand immI2()
 4888 %{
 4889   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4890   match(ConI);
 4891 
 4892   format %{ %}
 4893   interface(CONST_INTER);
 4894 %}
 4895 
 4896 operand immU7()
 4897 %{
 4898   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4899   match(ConI);
 4900 
 4901   op_cost(5);
 4902   format %{ %}
 4903   interface(CONST_INTER);
 4904 %}
 4905 
 4906 operand immI8()
 4907 %{
 4908   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4909   match(ConI);
 4910 
 4911   op_cost(5);
 4912   format %{ %}
 4913   interface(CONST_INTER);
 4914 %}
 4915 
 4916 operand immU8()
 4917 %{
 4918   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4919   match(ConI);
 4920 
 4921   op_cost(5);
 4922   format %{ %}
 4923   interface(CONST_INTER);
 4924 %}
 4925 
 4926 operand immI16()
 4927 %{
 4928   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4929   match(ConI);
 4930 
 4931   op_cost(10);
 4932   format %{ %}
 4933   interface(CONST_INTER);
 4934 %}
 4935 
 4936 // Int Immediate non-negative
 4937 operand immU31()
 4938 %{
 4939   predicate(n->get_int() >= 0);
 4940   match(ConI);
 4941 
 4942   op_cost(0);
 4943   format %{ %}
 4944   interface(CONST_INTER);
 4945 %}
 4946 
 4947 // Pointer Immediate
 4948 operand immP()
 4949 %{
 4950   match(ConP);
 4951 
 4952   op_cost(10);
 4953   format %{ %}
 4954   interface(CONST_INTER);
 4955 %}
 4956 
 4957 // Null Pointer Immediate
 4958 operand immP0()
 4959 %{
 4960   predicate(n->get_ptr() == 0);
 4961   match(ConP);
 4962 
 4963   op_cost(5);
 4964   format %{ %}
 4965   interface(CONST_INTER);
 4966 %}
 4967 
 4968 // Pointer Immediate
 4969 operand immN() %{
 4970   match(ConN);
 4971 
 4972   op_cost(10);
 4973   format %{ %}
 4974   interface(CONST_INTER);
 4975 %}
 4976 
 4977 operand immNKlass() %{
 4978   match(ConNKlass);
 4979 
 4980   op_cost(10);
 4981   format %{ %}
 4982   interface(CONST_INTER);
 4983 %}
 4984 
 4985 // Null Pointer Immediate
 4986 operand immN0() %{
 4987   predicate(n->get_narrowcon() == 0);
 4988   match(ConN);
 4989 
 4990   op_cost(5);
 4991   format %{ %}
 4992   interface(CONST_INTER);
 4993 %}
 4994 
 4995 operand immP31()
 4996 %{
 4997   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4998             && (n->get_ptr() >> 31) == 0);
 4999   match(ConP);
 5000 
 5001   op_cost(5);
 5002   format %{ %}
 5003   interface(CONST_INTER);
 5004 %}
 5005 
 5006 
 5007 // Long Immediate
 5008 operand immL()
 5009 %{
 5010   match(ConL);
 5011 
 5012   op_cost(20);
 5013   format %{ %}
 5014   interface(CONST_INTER);
 5015 %}
 5016 
 5017 // Long Immediate 8-bit
 5018 operand immL8()
 5019 %{
 5020   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 5021   match(ConL);
 5022 
 5023   op_cost(5);
 5024   format %{ %}
 5025   interface(CONST_INTER);
 5026 %}
 5027 
 5028 // Long Immediate 32-bit unsigned
 5029 operand immUL32()
 5030 %{
 5031   predicate(n->get_long() == (unsigned int) (n->get_long()));
 5032   match(ConL);
 5033 
 5034   op_cost(10);
 5035   format %{ %}
 5036   interface(CONST_INTER);
 5037 %}
 5038 
 5039 // Long Immediate 32-bit signed
 5040 operand immL32()
 5041 %{
 5042   predicate(n->get_long() == (int) (n->get_long()));
 5043   match(ConL);
 5044 
 5045   op_cost(15);
 5046   format %{ %}
 5047   interface(CONST_INTER);
 5048 %}
 5049 
 5050 operand immL_Pow2()
 5051 %{
 5052   predicate(is_power_of_2((julong)n->get_long()));
 5053   match(ConL);
 5054 
 5055   op_cost(15);
 5056   format %{ %}
 5057   interface(CONST_INTER);
 5058 %}
 5059 
 5060 operand immL_NotPow2()
 5061 %{
 5062   predicate(is_power_of_2((julong)~n->get_long()));
 5063   match(ConL);
 5064 
 5065   op_cost(15);
 5066   format %{ %}
 5067   interface(CONST_INTER);
 5068 %}
 5069 
 5070 // Long Immediate zero
 5071 operand immL0()
 5072 %{
 5073   predicate(n->get_long() == 0L);
 5074   match(ConL);
 5075 
 5076   op_cost(10);
 5077   format %{ %}
 5078   interface(CONST_INTER);
 5079 %}
 5080 
 5081 // Constant for increment
 5082 operand immL1()
 5083 %{
 5084   predicate(n->get_long() == 1);
 5085   match(ConL);
 5086 
 5087   format %{ %}
 5088   interface(CONST_INTER);
 5089 %}
 5090 
 5091 // Constant for decrement
 5092 operand immL_M1()
 5093 %{
 5094   predicate(n->get_long() == -1);
 5095   match(ConL);
 5096 
 5097   format %{ %}
 5098   interface(CONST_INTER);
 5099 %}
 5100 
 5101 // Long Immediate: low 32-bit mask
 5102 operand immL_32bits()
 5103 %{
 5104   predicate(n->get_long() == 0xFFFFFFFFL);
 5105   match(ConL);
 5106   op_cost(20);
 5107 
 5108   format %{ %}
 5109   interface(CONST_INTER);
 5110 %}
 5111 
 5112 // Int Immediate: 2^n-1, positive
 5113 operand immI_Pow2M1()
 5114 %{
 5115   predicate((n->get_int() > 0)
 5116             && is_power_of_2((juint)n->get_int() + 1));
 5117   match(ConI);
 5118 
 5119   op_cost(20);
 5120   format %{ %}
 5121   interface(CONST_INTER);
 5122 %}
 5123 
 5124 // Float Immediate zero
 5125 operand immF0()
 5126 %{
 5127   predicate(jint_cast(n->getf()) == 0);
 5128   match(ConF);
 5129 
 5130   op_cost(5);
 5131   format %{ %}
 5132   interface(CONST_INTER);
 5133 %}
 5134 
 5135 // Float Immediate
 5136 operand immF()
 5137 %{
 5138   match(ConF);
 5139 
 5140   op_cost(15);
 5141   format %{ %}
 5142   interface(CONST_INTER);
 5143 %}
 5144 
 5145 // Half Float Immediate
 5146 operand immH()
 5147 %{
 5148   match(ConH);
 5149 
 5150   op_cost(15);
 5151   format %{ %}
 5152   interface(CONST_INTER);
 5153 %}
 5154 
 5155 // Double Immediate zero
 5156 operand immD0()
 5157 %{
 5158   predicate(jlong_cast(n->getd()) == 0);
 5159   match(ConD);
 5160 
 5161   op_cost(5);
 5162   format %{ %}
 5163   interface(CONST_INTER);
 5164 %}
 5165 
 5166 // Double Immediate
 5167 operand immD()
 5168 %{
 5169   match(ConD);
 5170 
 5171   op_cost(15);
 5172   format %{ %}
 5173   interface(CONST_INTER);
 5174 %}
 5175 
 5176 // Immediates for special shifts (sign extend)
 5177 
 5178 // Constants for increment
 5179 operand immI_16()
 5180 %{
 5181   predicate(n->get_int() == 16);
 5182   match(ConI);
 5183 
 5184   format %{ %}
 5185   interface(CONST_INTER);
 5186 %}
 5187 
 5188 operand immI_24()
 5189 %{
 5190   predicate(n->get_int() == 24);
 5191   match(ConI);
 5192 
 5193   format %{ %}
 5194   interface(CONST_INTER);
 5195 %}
 5196 
 5197 // Constant for byte-wide masking
 5198 operand immI_255()
 5199 %{
 5200   predicate(n->get_int() == 255);
 5201   match(ConI);
 5202 
 5203   format %{ %}
 5204   interface(CONST_INTER);
 5205 %}
 5206 
 5207 // Constant for short-wide masking
 5208 operand immI_65535()
 5209 %{
 5210   predicate(n->get_int() == 65535);
 5211   match(ConI);
 5212 
 5213   format %{ %}
 5214   interface(CONST_INTER);
 5215 %}
 5216 
 5217 // Constant for byte-wide masking
 5218 operand immL_255()
 5219 %{
 5220   predicate(n->get_long() == 255);
 5221   match(ConL);
 5222 
 5223   format %{ %}
 5224   interface(CONST_INTER);
 5225 %}
 5226 
 5227 // Constant for short-wide masking
 5228 operand immL_65535()
 5229 %{
 5230   predicate(n->get_long() == 65535);
 5231   match(ConL);
 5232 
 5233   format %{ %}
 5234   interface(CONST_INTER);
 5235 %}
 5236 
 5237 // AOT Runtime Constants Address
 5238 operand immAOTRuntimeConstantsAddress()
 5239 %{
 5240   // Check if the address is in the range of AOT Runtime Constants
 5241   predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
 5242   match(ConP);
 5243 
 5244   op_cost(0);
 5245   format %{ %}
 5246   interface(CONST_INTER);
 5247 %}
 5248 
 5249 operand kReg()
 5250 %{
 5251   constraint(ALLOC_IN_RC(vectmask_reg));
 5252   match(RegVectMask);
 5253   format %{%}
 5254   interface(REG_INTER);
 5255 %}
 5256 
 5257 // Register Operands
 5258 // Integer Register
 5259 operand rRegI()
 5260 %{
 5261   constraint(ALLOC_IN_RC(int_reg));
 5262   match(RegI);
 5263 
 5264   match(rax_RegI);
 5265   match(rbx_RegI);
 5266   match(rcx_RegI);
 5267   match(rdx_RegI);
 5268   match(rdi_RegI);
 5269 
 5270   format %{ %}
 5271   interface(REG_INTER);
 5272 %}
 5273 
 5274 // Special Registers
 5275 operand rax_RegI()
 5276 %{
 5277   constraint(ALLOC_IN_RC(int_rax_reg));
 5278   match(RegI);
 5279   match(rRegI);
 5280 
 5281   format %{ "RAX" %}
 5282   interface(REG_INTER);
 5283 %}
 5284 
 5285 // Special Registers
 5286 operand rbx_RegI()
 5287 %{
 5288   constraint(ALLOC_IN_RC(int_rbx_reg));
 5289   match(RegI);
 5290   match(rRegI);
 5291 
 5292   format %{ "RBX" %}
 5293   interface(REG_INTER);
 5294 %}
 5295 
 5296 operand rcx_RegI()
 5297 %{
 5298   constraint(ALLOC_IN_RC(int_rcx_reg));
 5299   match(RegI);
 5300   match(rRegI);
 5301 
 5302   format %{ "RCX" %}
 5303   interface(REG_INTER);
 5304 %}
 5305 
 5306 operand rdx_RegI()
 5307 %{
 5308   constraint(ALLOC_IN_RC(int_rdx_reg));
 5309   match(RegI);
 5310   match(rRegI);
 5311 
 5312   format %{ "RDX" %}
 5313   interface(REG_INTER);
 5314 %}
 5315 
 5316 operand rdi_RegI()
 5317 %{
 5318   constraint(ALLOC_IN_RC(int_rdi_reg));
 5319   match(RegI);
 5320   match(rRegI);
 5321 
 5322   format %{ "RDI" %}
 5323   interface(REG_INTER);
 5324 %}
 5325 
 5326 operand no_rax_rdx_RegI()
 5327 %{
 5328   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5329   match(RegI);
 5330   match(rbx_RegI);
 5331   match(rcx_RegI);
 5332   match(rdi_RegI);
 5333 
 5334   format %{ %}
 5335   interface(REG_INTER);
 5336 %}
 5337 
 5338 operand no_rbp_r13_RegI()
 5339 %{
 5340   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5341   match(RegI);
 5342   match(rRegI);
 5343   match(rax_RegI);
 5344   match(rbx_RegI);
 5345   match(rcx_RegI);
 5346   match(rdx_RegI);
 5347   match(rdi_RegI);
 5348 
 5349   format %{ %}
 5350   interface(REG_INTER);
 5351 %}
 5352 
 5353 // Pointer Register
 5354 operand any_RegP()
 5355 %{
 5356   constraint(ALLOC_IN_RC(any_reg));
 5357   match(RegP);
 5358   match(rax_RegP);
 5359   match(rbx_RegP);
 5360   match(rdi_RegP);
 5361   match(rsi_RegP);
 5362   match(rbp_RegP);
 5363   match(r15_RegP);
 5364   match(rRegP);
 5365 
 5366   format %{ %}
 5367   interface(REG_INTER);
 5368 %}
 5369 
 5370 operand rRegP()
 5371 %{
 5372   constraint(ALLOC_IN_RC(ptr_reg));
 5373   match(RegP);
 5374   match(rax_RegP);
 5375   match(rbx_RegP);
 5376   match(rdi_RegP);
 5377   match(rsi_RegP);
 5378   match(rbp_RegP);  // See Q&A below about
 5379   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5380 
 5381   format %{ %}
 5382   interface(REG_INTER);
 5383 %}
 5384 
 5385 operand rRegN() %{
 5386   constraint(ALLOC_IN_RC(int_reg));
 5387   match(RegN);
 5388 
 5389   format %{ %}
 5390   interface(REG_INTER);
 5391 %}
 5392 
 5393 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5394 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5395 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5396 // The output of an instruction is controlled by the allocator, which respects
 5397 // register class masks, not match rules.  Unless an instruction mentions
 5398 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5399 // by the allocator as an input.
 5400 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5401 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5402 // result, RBP is not included in the output of the instruction either.
 5403 
 5404 // This operand is not allowed to use RBP even if
 5405 // RBP is not used to hold the frame pointer.
 5406 operand no_rbp_RegP()
 5407 %{
 5408   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5409   match(RegP);
 5410   match(rbx_RegP);
 5411   match(rsi_RegP);
 5412   match(rdi_RegP);
 5413 
 5414   format %{ %}
 5415   interface(REG_INTER);
 5416 %}
 5417 
 5418 // Special Registers
 5419 // Return a pointer value
 5420 operand rax_RegP()
 5421 %{
 5422   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5423   match(RegP);
 5424   match(rRegP);
 5425 
 5426   format %{ %}
 5427   interface(REG_INTER);
 5428 %}
 5429 
 5430 // Special Registers
 5431 // Return a compressed pointer value
 5432 operand rax_RegN()
 5433 %{
 5434   constraint(ALLOC_IN_RC(int_rax_reg));
 5435   match(RegN);
 5436   match(rRegN);
 5437 
 5438   format %{ %}
 5439   interface(REG_INTER);
 5440 %}
 5441 
 5442 // Used in AtomicAdd
 5443 operand rbx_RegP()
 5444 %{
 5445   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5446   match(RegP);
 5447   match(rRegP);
 5448 
 5449   format %{ %}
 5450   interface(REG_INTER);
 5451 %}
 5452 
 5453 operand rsi_RegP()
 5454 %{
 5455   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5456   match(RegP);
 5457   match(rRegP);
 5458 
 5459   format %{ %}
 5460   interface(REG_INTER);
 5461 %}
 5462 
 5463 operand rbp_RegP()
 5464 %{
 5465   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5466   match(RegP);
 5467   match(rRegP);
 5468 
 5469   format %{ %}
 5470   interface(REG_INTER);
 5471 %}
 5472 
 5473 // Used in rep stosq
 5474 operand rdi_RegP()
 5475 %{
 5476   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5477   match(RegP);
 5478   match(rRegP);
 5479 
 5480   format %{ %}
 5481   interface(REG_INTER);
 5482 %}
 5483 
 5484 operand r15_RegP()
 5485 %{
 5486   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5487   match(RegP);
 5488   match(rRegP);
 5489 
 5490   format %{ %}
 5491   interface(REG_INTER);
 5492 %}
 5493 
 5494 operand rRegL()
 5495 %{
 5496   constraint(ALLOC_IN_RC(long_reg));
 5497   match(RegL);
 5498   match(rax_RegL);
 5499   match(rdx_RegL);
 5500 
 5501   format %{ %}
 5502   interface(REG_INTER);
 5503 %}
 5504 
 5505 // Special Registers
 5506 operand no_rax_rdx_RegL()
 5507 %{
 5508   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5509   match(RegL);
 5510   match(rRegL);
 5511 
 5512   format %{ %}
 5513   interface(REG_INTER);
 5514 %}
 5515 
 5516 operand rax_RegL()
 5517 %{
 5518   constraint(ALLOC_IN_RC(long_rax_reg));
 5519   match(RegL);
 5520   match(rRegL);
 5521 
 5522   format %{ "RAX" %}
 5523   interface(REG_INTER);
 5524 %}
 5525 
 5526 operand rcx_RegL()
 5527 %{
 5528   constraint(ALLOC_IN_RC(long_rcx_reg));
 5529   match(RegL);
 5530   match(rRegL);
 5531 
 5532   format %{ %}
 5533   interface(REG_INTER);
 5534 %}
 5535 
 5536 operand rdx_RegL()
 5537 %{
 5538   constraint(ALLOC_IN_RC(long_rdx_reg));
 5539   match(RegL);
 5540   match(rRegL);
 5541 
 5542   format %{ %}
 5543   interface(REG_INTER);
 5544 %}
 5545 
 5546 operand r11_RegL()
 5547 %{
 5548   constraint(ALLOC_IN_RC(long_r11_reg));
 5549   match(RegL);
 5550   match(rRegL);
 5551 
 5552   format %{ %}
 5553   interface(REG_INTER);
 5554 %}
 5555 
 5556 operand no_rbp_r13_RegL()
 5557 %{
 5558   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5559   match(RegL);
 5560   match(rRegL);
 5561   match(rax_RegL);
 5562   match(rcx_RegL);
 5563   match(rdx_RegL);
 5564 
 5565   format %{ %}
 5566   interface(REG_INTER);
 5567 %}
 5568 
 5569 // Flags register, used as output of compare instructions
 5570 operand rFlagsReg()
 5571 %{
 5572   constraint(ALLOC_IN_RC(int_flags));
 5573   match(RegFlags);
 5574 
 5575   format %{ "RFLAGS" %}
 5576   interface(REG_INTER);
 5577 %}
 5578 
 5579 // Flags register, used as output of FLOATING POINT compare instructions
 5580 operand rFlagsRegU()
 5581 %{
 5582   constraint(ALLOC_IN_RC(int_flags));
 5583   match(RegFlags);
 5584 
 5585   format %{ "RFLAGS_U" %}
 5586   interface(REG_INTER);
 5587 %}
 5588 
 5589 operand rFlagsRegUCF() %{
 5590   constraint(ALLOC_IN_RC(int_flags));
 5591   match(RegFlags);
 5592   predicate(!UseAPX || !VM_Version::supports_avx10_2());
 5593 
 5594   format %{ "RFLAGS_U_CF" %}
 5595   interface(REG_INTER);
 5596 %}
 5597 
 5598 operand rFlagsRegUCFE() %{
 5599   constraint(ALLOC_IN_RC(int_flags));
 5600   match(RegFlags);
 5601   predicate(UseAPX && VM_Version::supports_avx10_2());
 5602 
 5603   format %{ "RFLAGS_U_CFE" %}
 5604   interface(REG_INTER);
 5605 %}
 5606 
 5607 // Float register operands
 5608 operand regF() %{
 5609    constraint(ALLOC_IN_RC(float_reg));
 5610    match(RegF);
 5611 
 5612    format %{ %}
 5613    interface(REG_INTER);
 5614 %}
 5615 
 5616 // Float register operands
 5617 operand legRegF() %{
 5618    constraint(ALLOC_IN_RC(float_reg_legacy));
 5619    match(RegF);
 5620 
 5621    format %{ %}
 5622    interface(REG_INTER);
 5623 %}
 5624 
 5625 // Float register operands
 5626 operand vlRegF() %{
 5627    constraint(ALLOC_IN_RC(float_reg_vl));
 5628    match(RegF);
 5629 
 5630    format %{ %}
 5631    interface(REG_INTER);
 5632 %}
 5633 
 5634 // Double register operands
 5635 operand regD() %{
 5636    constraint(ALLOC_IN_RC(double_reg));
 5637    match(RegD);
 5638 
 5639    format %{ %}
 5640    interface(REG_INTER);
 5641 %}
 5642 
 5643 // Double register operands
 5644 operand legRegD() %{
 5645    constraint(ALLOC_IN_RC(double_reg_legacy));
 5646    match(RegD);
 5647 
 5648    format %{ %}
 5649    interface(REG_INTER);
 5650 %}
 5651 
 5652 // Double register operands
 5653 operand vlRegD() %{
 5654    constraint(ALLOC_IN_RC(double_reg_vl));
 5655    match(RegD);
 5656 
 5657    format %{ %}
 5658    interface(REG_INTER);
 5659 %}
 5660 
 5661 //----------Memory Operands----------------------------------------------------
 5662 // Direct Memory Operand
 5663 // operand direct(immP addr)
 5664 // %{
 5665 //   match(addr);
 5666 
 5667 //   format %{ "[$addr]" %}
 5668 //   interface(MEMORY_INTER) %{
 5669 //     base(0xFFFFFFFF);
 5670 //     index(0x4);
 5671 //     scale(0x0);
 5672 //     disp($addr);
 5673 //   %}
 5674 // %}
 5675 
 5676 // Indirect Memory Operand
 5677 operand indirect(any_RegP reg)
 5678 %{
 5679   constraint(ALLOC_IN_RC(ptr_reg));
 5680   match(reg);
 5681 
 5682   format %{ "[$reg]" %}
 5683   interface(MEMORY_INTER) %{
 5684     base($reg);
 5685     index(0x4);
 5686     scale(0x0);
 5687     disp(0x0);
 5688   %}
 5689 %}
 5690 
 5691 // Indirect Memory Plus Short Offset Operand
 5692 operand indOffset8(any_RegP reg, immL8 off)
 5693 %{
 5694   constraint(ALLOC_IN_RC(ptr_reg));
 5695   match(AddP reg off);
 5696 
 5697   format %{ "[$reg + $off (8-bit)]" %}
 5698   interface(MEMORY_INTER) %{
 5699     base($reg);
 5700     index(0x4);
 5701     scale(0x0);
 5702     disp($off);
 5703   %}
 5704 %}
 5705 
 5706 // Indirect Memory Plus Long Offset Operand
 5707 operand indOffset32(any_RegP reg, immL32 off)
 5708 %{
 5709   constraint(ALLOC_IN_RC(ptr_reg));
 5710   match(AddP reg off);
 5711 
 5712   format %{ "[$reg + $off (32-bit)]" %}
 5713   interface(MEMORY_INTER) %{
 5714     base($reg);
 5715     index(0x4);
 5716     scale(0x0);
 5717     disp($off);
 5718   %}
 5719 %}
 5720 
 5721 // Indirect Memory Plus Index Register Plus Offset Operand
 5722 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5723 %{
 5724   constraint(ALLOC_IN_RC(ptr_reg));
 5725   match(AddP (AddP reg lreg) off);
 5726 
 5727   op_cost(10);
 5728   format %{"[$reg + $off + $lreg]" %}
 5729   interface(MEMORY_INTER) %{
 5730     base($reg);
 5731     index($lreg);
 5732     scale(0x0);
 5733     disp($off);
 5734   %}
 5735 %}
 5736 
 5737 // Indirect Memory Plus Index Register Plus Offset Operand
 5738 operand indIndex(any_RegP reg, rRegL lreg)
 5739 %{
 5740   constraint(ALLOC_IN_RC(ptr_reg));
 5741   match(AddP reg lreg);
 5742 
 5743   op_cost(10);
 5744   format %{"[$reg + $lreg]" %}
 5745   interface(MEMORY_INTER) %{
 5746     base($reg);
 5747     index($lreg);
 5748     scale(0x0);
 5749     disp(0x0);
 5750   %}
 5751 %}
 5752 
 5753 // Indirect Memory Times Scale Plus Index Register
 5754 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5755 %{
 5756   constraint(ALLOC_IN_RC(ptr_reg));
 5757   match(AddP reg (LShiftL lreg scale));
 5758 
 5759   op_cost(10);
 5760   format %{"[$reg + $lreg << $scale]" %}
 5761   interface(MEMORY_INTER) %{
 5762     base($reg);
 5763     index($lreg);
 5764     scale($scale);
 5765     disp(0x0);
 5766   %}
 5767 %}
 5768 
 5769 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5770 %{
 5771   constraint(ALLOC_IN_RC(ptr_reg));
 5772   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5773   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5774 
 5775   op_cost(10);
 5776   format %{"[$reg + pos $idx << $scale]" %}
 5777   interface(MEMORY_INTER) %{
 5778     base($reg);
 5779     index($idx);
 5780     scale($scale);
 5781     disp(0x0);
 5782   %}
 5783 %}
 5784 
 5785 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5786 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5787 %{
 5788   constraint(ALLOC_IN_RC(ptr_reg));
 5789   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5790 
 5791   op_cost(10);
 5792   format %{"[$reg + $off + $lreg << $scale]" %}
 5793   interface(MEMORY_INTER) %{
 5794     base($reg);
 5795     index($lreg);
 5796     scale($scale);
 5797     disp($off);
 5798   %}
 5799 %}
 5800 
 5801 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5802 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5803 %{
 5804   constraint(ALLOC_IN_RC(ptr_reg));
 5805   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5806   match(AddP (AddP reg (ConvI2L idx)) off);
 5807 
 5808   op_cost(10);
 5809   format %{"[$reg + $off + $idx]" %}
 5810   interface(MEMORY_INTER) %{
 5811     base($reg);
 5812     index($idx);
 5813     scale(0x0);
 5814     disp($off);
 5815   %}
 5816 %}
 5817 
 5818 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5819 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5820 %{
 5821   constraint(ALLOC_IN_RC(ptr_reg));
 5822   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5823   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5824 
 5825   op_cost(10);
 5826   format %{"[$reg + $off + $idx << $scale]" %}
 5827   interface(MEMORY_INTER) %{
 5828     base($reg);
 5829     index($idx);
 5830     scale($scale);
 5831     disp($off);
 5832   %}
 5833 %}
 5834 
 5835 // Indirect Narrow Oop Operand
 5836 operand indCompressedOop(rRegN reg) %{
 5837   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5838   constraint(ALLOC_IN_RC(ptr_reg));
 5839   match(DecodeN reg);
 5840 
 5841   op_cost(10);
 5842   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5843   interface(MEMORY_INTER) %{
 5844     base(0xc); // R12
 5845     index($reg);
 5846     scale(0x3);
 5847     disp(0x0);
 5848   %}
 5849 %}
 5850 
 5851 // Indirect Narrow Oop Plus Offset Operand
 5852 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5853 // we can't free r12 even with CompressedOops::base() == nullptr.
 5854 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5855   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5856   constraint(ALLOC_IN_RC(ptr_reg));
 5857   match(AddP (DecodeN reg) off);
 5858 
 5859   op_cost(10);
 5860   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5861   interface(MEMORY_INTER) %{
 5862     base(0xc); // R12
 5863     index($reg);
 5864     scale(0x3);
 5865     disp($off);
 5866   %}
 5867 %}
 5868 
 5869 // Indirect Memory Operand
 5870 operand indirectNarrow(rRegN reg)
 5871 %{
 5872   predicate(CompressedOops::shift() == 0);
 5873   constraint(ALLOC_IN_RC(ptr_reg));
 5874   match(DecodeN reg);
 5875 
 5876   format %{ "[$reg]" %}
 5877   interface(MEMORY_INTER) %{
 5878     base($reg);
 5879     index(0x4);
 5880     scale(0x0);
 5881     disp(0x0);
 5882   %}
 5883 %}
 5884 
 5885 // Indirect Memory Plus Short Offset Operand
 5886 operand indOffset8Narrow(rRegN reg, immL8 off)
 5887 %{
 5888   predicate(CompressedOops::shift() == 0);
 5889   constraint(ALLOC_IN_RC(ptr_reg));
 5890   match(AddP (DecodeN reg) off);
 5891 
 5892   format %{ "[$reg + $off (8-bit)]" %}
 5893   interface(MEMORY_INTER) %{
 5894     base($reg);
 5895     index(0x4);
 5896     scale(0x0);
 5897     disp($off);
 5898   %}
 5899 %}
 5900 
 5901 // Indirect Memory Plus Long Offset Operand
 5902 operand indOffset32Narrow(rRegN reg, immL32 off)
 5903 %{
 5904   predicate(CompressedOops::shift() == 0);
 5905   constraint(ALLOC_IN_RC(ptr_reg));
 5906   match(AddP (DecodeN reg) off);
 5907 
 5908   format %{ "[$reg + $off (32-bit)]" %}
 5909   interface(MEMORY_INTER) %{
 5910     base($reg);
 5911     index(0x4);
 5912     scale(0x0);
 5913     disp($off);
 5914   %}
 5915 %}
 5916 
 5917 // Indirect Memory Plus Index Register Plus Offset Operand
 5918 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5919 %{
 5920   predicate(CompressedOops::shift() == 0);
 5921   constraint(ALLOC_IN_RC(ptr_reg));
 5922   match(AddP (AddP (DecodeN reg) lreg) off);
 5923 
 5924   op_cost(10);
 5925   format %{"[$reg + $off + $lreg]" %}
 5926   interface(MEMORY_INTER) %{
 5927     base($reg);
 5928     index($lreg);
 5929     scale(0x0);
 5930     disp($off);
 5931   %}
 5932 %}
 5933 
 5934 // Indirect Memory Plus Index Register Plus Offset Operand
 5935 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5936 %{
 5937   predicate(CompressedOops::shift() == 0);
 5938   constraint(ALLOC_IN_RC(ptr_reg));
 5939   match(AddP (DecodeN reg) lreg);
 5940 
 5941   op_cost(10);
 5942   format %{"[$reg + $lreg]" %}
 5943   interface(MEMORY_INTER) %{
 5944     base($reg);
 5945     index($lreg);
 5946     scale(0x0);
 5947     disp(0x0);
 5948   %}
 5949 %}
 5950 
 5951 // Indirect Memory Times Scale Plus Index Register
 5952 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5953 %{
 5954   predicate(CompressedOops::shift() == 0);
 5955   constraint(ALLOC_IN_RC(ptr_reg));
 5956   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5957 
 5958   op_cost(10);
 5959   format %{"[$reg + $lreg << $scale]" %}
 5960   interface(MEMORY_INTER) %{
 5961     base($reg);
 5962     index($lreg);
 5963     scale($scale);
 5964     disp(0x0);
 5965   %}
 5966 %}
 5967 
 5968 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5969 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5970 %{
 5971   predicate(CompressedOops::shift() == 0);
 5972   constraint(ALLOC_IN_RC(ptr_reg));
 5973   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5974 
 5975   op_cost(10);
 5976   format %{"[$reg + $off + $lreg << $scale]" %}
 5977   interface(MEMORY_INTER) %{
 5978     base($reg);
 5979     index($lreg);
 5980     scale($scale);
 5981     disp($off);
 5982   %}
 5983 %}
 5984 
 5985 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5986 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5987 %{
 5988   constraint(ALLOC_IN_RC(ptr_reg));
 5989   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5990   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5991 
 5992   op_cost(10);
 5993   format %{"[$reg + $off + $idx]" %}
 5994   interface(MEMORY_INTER) %{
 5995     base($reg);
 5996     index($idx);
 5997     scale(0x0);
 5998     disp($off);
 5999   %}
 6000 %}
 6001 
 6002 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 6003 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 6004 %{
 6005   constraint(ALLOC_IN_RC(ptr_reg));
 6006   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 6007   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 6008 
 6009   op_cost(10);
 6010   format %{"[$reg + $off + $idx << $scale]" %}
 6011   interface(MEMORY_INTER) %{
 6012     base($reg);
 6013     index($idx);
 6014     scale($scale);
 6015     disp($off);
 6016   %}
 6017 %}
 6018 
 6019 //----------Special Memory Operands--------------------------------------------
 6020 // Stack Slot Operand - This operand is used for loading and storing temporary
 6021 //                      values on the stack where a match requires a value to
 6022 //                      flow through memory.
 6023 operand stackSlotP(sRegP reg)
 6024 %{
 6025   constraint(ALLOC_IN_RC(stack_slots));
 6026   // No match rule because this operand is only generated in matching
 6027 
 6028   format %{ "[$reg]" %}
 6029   interface(MEMORY_INTER) %{
 6030     base(0x4);   // RSP
 6031     index(0x4);  // No Index
 6032     scale(0x0);  // No Scale
 6033     disp($reg);  // Stack Offset
 6034   %}
 6035 %}
 6036 
 6037 operand stackSlotI(sRegI reg)
 6038 %{
 6039   constraint(ALLOC_IN_RC(stack_slots));
 6040   // No match rule because this operand is only generated in matching
 6041 
 6042   format %{ "[$reg]" %}
 6043   interface(MEMORY_INTER) %{
 6044     base(0x4);   // RSP
 6045     index(0x4);  // No Index
 6046     scale(0x0);  // No Scale
 6047     disp($reg);  // Stack Offset
 6048   %}
 6049 %}
 6050 
 6051 operand stackSlotF(sRegF reg)
 6052 %{
 6053   constraint(ALLOC_IN_RC(stack_slots));
 6054   // No match rule because this operand is only generated in matching
 6055 
 6056   format %{ "[$reg]" %}
 6057   interface(MEMORY_INTER) %{
 6058     base(0x4);   // RSP
 6059     index(0x4);  // No Index
 6060     scale(0x0);  // No Scale
 6061     disp($reg);  // Stack Offset
 6062   %}
 6063 %}
 6064 
 6065 operand stackSlotD(sRegD reg)
 6066 %{
 6067   constraint(ALLOC_IN_RC(stack_slots));
 6068   // No match rule because this operand is only generated in matching
 6069 
 6070   format %{ "[$reg]" %}
 6071   interface(MEMORY_INTER) %{
 6072     base(0x4);   // RSP
 6073     index(0x4);  // No Index
 6074     scale(0x0);  // No Scale
 6075     disp($reg);  // Stack Offset
 6076   %}
 6077 %}
 6078 operand stackSlotL(sRegL reg)
 6079 %{
 6080   constraint(ALLOC_IN_RC(stack_slots));
 6081   // No match rule because this operand is only generated in matching
 6082 
 6083   format %{ "[$reg]" %}
 6084   interface(MEMORY_INTER) %{
 6085     base(0x4);   // RSP
 6086     index(0x4);  // No Index
 6087     scale(0x0);  // No Scale
 6088     disp($reg);  // Stack Offset
 6089   %}
 6090 %}
 6091 
 6092 //----------Conditional Branch Operands----------------------------------------
 6093 // Comparison Op  - This is the operation of the comparison, and is limited to
 6094 //                  the following set of codes:
 6095 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6096 //
 6097 // Other attributes of the comparison, such as unsignedness, are specified
 6098 // by the comparison instruction that sets a condition code flags register.
 6099 // That result is represented by a flags operand whose subtype is appropriate
 6100 // to the unsignedness (etc.) of the comparison.
 6101 //
 6102 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6103 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6104 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6105 
 6106 // Comparison Code
 6107 operand cmpOp()
 6108 %{
 6109   match(Bool);
 6110 
 6111   format %{ "" %}
 6112   interface(COND_INTER) %{
 6113     equal(0x4, "e");
 6114     not_equal(0x5, "ne");
 6115     less(0xc, "l");
 6116     greater_equal(0xd, "ge");
 6117     less_equal(0xe, "le");
 6118     greater(0xf, "g");
 6119     overflow(0x0, "o");
 6120     no_overflow(0x1, "no");
 6121   %}
 6122 %}
 6123 
 6124 // Comparison Code, unsigned compare.  Used by FP also, with
 6125 // C2 (unordered) turned into GT or LT already.  The other bits
 6126 // C0 and C3 are turned into Carry & Zero flags.
 6127 operand cmpOpU()
 6128 %{
 6129   match(Bool);
 6130 
 6131   format %{ "" %}
 6132   interface(COND_INTER) %{
 6133     equal(0x4, "e");
 6134     not_equal(0x5, "ne");
 6135     less(0x2, "b");
 6136     greater_equal(0x3, "ae");
 6137     less_equal(0x6, "be");
 6138     greater(0x7, "a");
 6139     overflow(0x0, "o");
 6140     no_overflow(0x1, "no");
 6141   %}
 6142 %}
 6143 
 6144 
 6145 // Floating comparisons that don't require any fixup for the unordered case,
 6146 // If both inputs of the comparison are the same, ZF is always set so we
 6147 // don't need to use cmpOpUCF2 for eq/ne
 6148 operand cmpOpUCF() %{
 6149   match(Bool);
 6150   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6151             (n->as_Bool()->_test._test == BoolTest::lt ||
 6152              n->as_Bool()->_test._test == BoolTest::ge ||
 6153              n->as_Bool()->_test._test == BoolTest::le ||
 6154              n->as_Bool()->_test._test == BoolTest::gt ||
 6155              n->in(1)->in(1) == n->in(1)->in(2)));
 6156   format %{ "" %}
 6157   interface(COND_INTER) %{
 6158     equal(0xb, "np");
 6159     not_equal(0xa, "p");
 6160     less(0x2, "b");
 6161     greater_equal(0x3, "ae");
 6162     less_equal(0x6, "be");
 6163     greater(0x7, "a");
 6164     overflow(0x0, "o");
 6165     no_overflow(0x1, "no");
 6166   %}
 6167 %}
 6168 
 6169 
 6170 // Floating comparisons that can be fixed up with extra conditional jumps
 6171 operand cmpOpUCF2() %{
 6172   match(Bool);
 6173   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6174             (n->as_Bool()->_test._test == BoolTest::ne ||
 6175              n->as_Bool()->_test._test == BoolTest::eq) &&
 6176             n->in(1)->in(1) != n->in(1)->in(2));
 6177   format %{ "" %}
 6178   interface(COND_INTER) %{
 6179     equal(0x4, "e");
 6180     not_equal(0x5, "ne");
 6181     less(0x2, "b");
 6182     greater_equal(0x3, "ae");
 6183     less_equal(0x6, "be");
 6184     greater(0x7, "a");
 6185     overflow(0x0, "o");
 6186     no_overflow(0x1, "no");
 6187   %}
 6188 %}
 6189 
 6190 
 6191 // Floating point comparisons that set condition flags to test more directly,
 6192 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
 6193 // are used for L (<) and LE (<=) conditions. It's important to convert these
 6194 // latter conditions to ones that use unsigned tests before passing into an
 6195 // instruction because the preceding comparison might be based on a three way
 6196 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
 6197 operand cmpOpUCFE()
 6198 %{
 6199   match(Bool);
 6200   predicate((UseAPX && VM_Version::supports_avx10_2()) &&
 6201             (n->as_Bool()->_test._test == BoolTest::ne ||
 6202              n->as_Bool()->_test._test == BoolTest::eq ||
 6203              n->as_Bool()->_test._test == BoolTest::lt ||
 6204              n->as_Bool()->_test._test == BoolTest::ge ||
 6205              n->as_Bool()->_test._test == BoolTest::le ||
 6206              n->as_Bool()->_test._test == BoolTest::gt));
 6207 
 6208   format %{ "" %}
 6209   interface(COND_INTER) %{
 6210     equal(0x4, "e");
 6211     not_equal(0x5, "ne");
 6212     less(0x2, "b");
 6213     greater_equal(0x3, "ae");
 6214     less_equal(0x6, "be");
 6215     greater(0x7, "a");
 6216     overflow(0x0, "o");
 6217     no_overflow(0x1, "no");
 6218   %}
 6219 %}
 6220 
 6221 // Operands for bound floating pointer register arguments
 6222 operand rxmm0() %{
 6223   constraint(ALLOC_IN_RC(xmm0_reg));
 6224   match(VecX);
 6225   format%{%}
 6226   interface(REG_INTER);
 6227 %}
 6228 
 6229 // Vectors
 6230 
 6231 // Dummy generic vector class. Should be used for all vector operands.
 6232 // Replaced with vec[SDXYZ] during post-selection pass.
 6233 operand vec() %{
 6234   constraint(ALLOC_IN_RC(dynamic));
 6235   match(VecX);
 6236   match(VecY);
 6237   match(VecZ);
 6238   match(VecS);
 6239   match(VecD);
 6240 
 6241   format %{ %}
 6242   interface(REG_INTER);
 6243 %}
 6244 
 6245 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6246 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6247 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6248 // runtime code generation via reg_class_dynamic.
 6249 operand legVec() %{
 6250   constraint(ALLOC_IN_RC(dynamic));
 6251   match(VecX);
 6252   match(VecY);
 6253   match(VecZ);
 6254   match(VecS);
 6255   match(VecD);
 6256 
 6257   format %{ %}
 6258   interface(REG_INTER);
 6259 %}
 6260 
 6261 // Replaces vec during post-selection cleanup. See above.
 6262 operand vecS() %{
 6263   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6264   match(VecS);
 6265 
 6266   format %{ %}
 6267   interface(REG_INTER);
 6268 %}
 6269 
 6270 // Replaces legVec during post-selection cleanup. See above.
 6271 operand legVecS() %{
 6272   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6273   match(VecS);
 6274 
 6275   format %{ %}
 6276   interface(REG_INTER);
 6277 %}
 6278 
 6279 // Replaces vec during post-selection cleanup. See above.
 6280 operand vecD() %{
 6281   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6282   match(VecD);
 6283 
 6284   format %{ %}
 6285   interface(REG_INTER);
 6286 %}
 6287 
 6288 // Replaces legVec during post-selection cleanup. See above.
 6289 operand legVecD() %{
 6290   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6291   match(VecD);
 6292 
 6293   format %{ %}
 6294   interface(REG_INTER);
 6295 %}
 6296 
 6297 // Replaces vec during post-selection cleanup. See above.
 6298 operand vecX() %{
 6299   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6300   match(VecX);
 6301 
 6302   format %{ %}
 6303   interface(REG_INTER);
 6304 %}
 6305 
 6306 // Replaces legVec during post-selection cleanup. See above.
 6307 operand legVecX() %{
 6308   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6309   match(VecX);
 6310 
 6311   format %{ %}
 6312   interface(REG_INTER);
 6313 %}
 6314 
 6315 // Replaces vec during post-selection cleanup. See above.
 6316 operand vecY() %{
 6317   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6318   match(VecY);
 6319 
 6320   format %{ %}
 6321   interface(REG_INTER);
 6322 %}
 6323 
 6324 // Replaces legVec during post-selection cleanup. See above.
 6325 operand legVecY() %{
 6326   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6327   match(VecY);
 6328 
 6329   format %{ %}
 6330   interface(REG_INTER);
 6331 %}
 6332 
 6333 // Replaces vec during post-selection cleanup. See above.
 6334 operand vecZ() %{
 6335   constraint(ALLOC_IN_RC(vectorz_reg));
 6336   match(VecZ);
 6337 
 6338   format %{ %}
 6339   interface(REG_INTER);
 6340 %}
 6341 
 6342 // Replaces legVec during post-selection cleanup. See above.
 6343 operand legVecZ() %{
 6344   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6345   match(VecZ);
 6346 
 6347   format %{ %}
 6348   interface(REG_INTER);
 6349 %}
 6350 
 6351 //----------OPERAND CLASSES----------------------------------------------------
 6352 // Operand Classes are groups of operands that are used as to simplify
 6353 // instruction definitions by not requiring the AD writer to specify separate
 6354 // instructions for every form of operand when the instruction accepts
 6355 // multiple operand types with the same basic encoding and format.  The classic
 6356 // case of this is memory operands.
 6357 
 6358 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6359                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6360                indCompressedOop, indCompressedOopOffset,
 6361                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6362                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6363                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6364 
 6365 //----------PIPELINE-----------------------------------------------------------
 6366 // Rules which define the behavior of the target architectures pipeline.
 6367 pipeline %{
 6368 
 6369 //----------ATTRIBUTES---------------------------------------------------------
 6370 attributes %{
 6371   variable_size_instructions;        // Fixed size instructions
 6372   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6373   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6374   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6375   instruction_fetch_units = 1;       // of 16 bytes
 6376 %}
 6377 
 6378 //----------RESOURCES----------------------------------------------------------
 6379 // Resources are the functional units available to the machine
 6380 
 6381 // Generic P2/P3 pipeline
 6382 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6383 // 3 instructions decoded per cycle.
 6384 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6385 // 3 ALU op, only ALU0 handles mul instructions.
 6386 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6387            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6388            BR, FPU,
 6389            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6390 
 6391 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6392 // Pipeline Description specifies the stages in the machine's pipeline
 6393 
 6394 // Generic P2/P3 pipeline
 6395 pipe_desc(S0, S1, S2, S3, S4, S5);
 6396 
 6397 //----------PIPELINE CLASSES---------------------------------------------------
 6398 // Pipeline Classes describe the stages in which input and output are
 6399 // referenced by the hardware pipeline.
 6400 
 6401 // Naming convention: ialu or fpu
 6402 // Then: _reg
 6403 // Then: _reg if there is a 2nd register
 6404 // Then: _long if it's a pair of instructions implementing a long
 6405 // Then: _fat if it requires the big decoder
 6406 //   Or: _mem if it requires the big decoder and a memory unit.
 6407 
 6408 // Integer ALU reg operation
 6409 pipe_class ialu_reg(rRegI dst)
 6410 %{
 6411     single_instruction;
 6412     dst    : S4(write);
 6413     dst    : S3(read);
 6414     DECODE : S0;        // any decoder
 6415     ALU    : S3;        // any alu
 6416 %}
 6417 
 6418 // Long ALU reg operation
 6419 pipe_class ialu_reg_long(rRegL dst)
 6420 %{
 6421     instruction_count(2);
 6422     dst    : S4(write);
 6423     dst    : S3(read);
 6424     DECODE : S0(2);     // any 2 decoders
 6425     ALU    : S3(2);     // both alus
 6426 %}
 6427 
 6428 // Integer ALU reg operation using big decoder
 6429 pipe_class ialu_reg_fat(rRegI dst)
 6430 %{
 6431     single_instruction;
 6432     dst    : S4(write);
 6433     dst    : S3(read);
 6434     D0     : S0;        // big decoder only
 6435     ALU    : S3;        // any alu
 6436 %}
 6437 
 6438 // Integer ALU reg-reg operation
 6439 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6440 %{
 6441     single_instruction;
 6442     dst    : S4(write);
 6443     src    : S3(read);
 6444     DECODE : S0;        // any decoder
 6445     ALU    : S3;        // any alu
 6446 %}
 6447 
 6448 // Integer ALU reg-reg operation
 6449 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6450 %{
 6451     single_instruction;
 6452     dst    : S4(write);
 6453     src    : S3(read);
 6454     D0     : S0;        // big decoder only
 6455     ALU    : S3;        // any alu
 6456 %}
 6457 
 6458 // Integer ALU reg-mem operation
 6459 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6460 %{
 6461     single_instruction;
 6462     dst    : S5(write);
 6463     mem    : S3(read);
 6464     D0     : S0;        // big decoder only
 6465     ALU    : S4;        // any alu
 6466     MEM    : S3;        // any mem
 6467 %}
 6468 
 6469 // Integer mem operation (prefetch)
 6470 pipe_class ialu_mem(memory mem)
 6471 %{
 6472     single_instruction;
 6473     mem    : S3(read);
 6474     D0     : S0;        // big decoder only
 6475     MEM    : S3;        // any mem
 6476 %}
 6477 
 6478 // Integer Store to Memory
 6479 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6480 %{
 6481     single_instruction;
 6482     mem    : S3(read);
 6483     src    : S5(read);
 6484     D0     : S0;        // big decoder only
 6485     ALU    : S4;        // any alu
 6486     MEM    : S3;
 6487 %}
 6488 
 6489 // // Long Store to Memory
 6490 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6491 // %{
 6492 //     instruction_count(2);
 6493 //     mem    : S3(read);
 6494 //     src    : S5(read);
 6495 //     D0     : S0(2);          // big decoder only; twice
 6496 //     ALU    : S4(2);     // any 2 alus
 6497 //     MEM    : S3(2);  // Both mems
 6498 // %}
 6499 
 6500 // Integer Store to Memory
 6501 pipe_class ialu_mem_imm(memory mem)
 6502 %{
 6503     single_instruction;
 6504     mem    : S3(read);
 6505     D0     : S0;        // big decoder only
 6506     ALU    : S4;        // any alu
 6507     MEM    : S3;
 6508 %}
 6509 
 6510 // Integer ALU0 reg-reg operation
 6511 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6512 %{
 6513     single_instruction;
 6514     dst    : S4(write);
 6515     src    : S3(read);
 6516     D0     : S0;        // Big decoder only
 6517     ALU0   : S3;        // only alu0
 6518 %}
 6519 
 6520 // Integer ALU0 reg-mem operation
 6521 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6522 %{
 6523     single_instruction;
 6524     dst    : S5(write);
 6525     mem    : S3(read);
 6526     D0     : S0;        // big decoder only
 6527     ALU0   : S4;        // ALU0 only
 6528     MEM    : S3;        // any mem
 6529 %}
 6530 
 6531 // Integer ALU reg-reg operation
 6532 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6533 %{
 6534     single_instruction;
 6535     cr     : S4(write);
 6536     src1   : S3(read);
 6537     src2   : S3(read);
 6538     DECODE : S0;        // any decoder
 6539     ALU    : S3;        // any alu
 6540 %}
 6541 
 6542 // Integer ALU reg-imm operation
 6543 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6544 %{
 6545     single_instruction;
 6546     cr     : S4(write);
 6547     src1   : S3(read);
 6548     DECODE : S0;        // any decoder
 6549     ALU    : S3;        // any alu
 6550 %}
 6551 
 6552 // Integer ALU reg-mem operation
 6553 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6554 %{
 6555     single_instruction;
 6556     cr     : S4(write);
 6557     src1   : S3(read);
 6558     src2   : S3(read);
 6559     D0     : S0;        // big decoder only
 6560     ALU    : S4;        // any alu
 6561     MEM    : S3;
 6562 %}
 6563 
 6564 // Conditional move reg-reg
 6565 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6566 %{
 6567     instruction_count(4);
 6568     y      : S4(read);
 6569     q      : S3(read);
 6570     p      : S3(read);
 6571     DECODE : S0(4);     // any decoder
 6572 %}
 6573 
 6574 // Conditional move reg-reg
 6575 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6576 %{
 6577     single_instruction;
 6578     dst    : S4(write);
 6579     src    : S3(read);
 6580     cr     : S3(read);
 6581     DECODE : S0;        // any decoder
 6582 %}
 6583 
 6584 // Conditional move reg-mem
 6585 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6586 %{
 6587     single_instruction;
 6588     dst    : S4(write);
 6589     src    : S3(read);
 6590     cr     : S3(read);
 6591     DECODE : S0;        // any decoder
 6592     MEM    : S3;
 6593 %}
 6594 
 6595 // Conditional move reg-reg long
 6596 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6597 %{
 6598     single_instruction;
 6599     dst    : S4(write);
 6600     src    : S3(read);
 6601     cr     : S3(read);
 6602     DECODE : S0(2);     // any 2 decoders
 6603 %}
 6604 
 6605 // Float reg-reg operation
 6606 pipe_class fpu_reg(regD dst)
 6607 %{
 6608     instruction_count(2);
 6609     dst    : S3(read);
 6610     DECODE : S0(2);     // any 2 decoders
 6611     FPU    : S3;
 6612 %}
 6613 
 6614 // Float reg-reg operation
 6615 pipe_class fpu_reg_reg(regD dst, regD src)
 6616 %{
 6617     instruction_count(2);
 6618     dst    : S4(write);
 6619     src    : S3(read);
 6620     DECODE : S0(2);     // any 2 decoders
 6621     FPU    : S3;
 6622 %}
 6623 
 6624 // Float reg-reg operation
 6625 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6626 %{
 6627     instruction_count(3);
 6628     dst    : S4(write);
 6629     src1   : S3(read);
 6630     src2   : S3(read);
 6631     DECODE : S0(3);     // any 3 decoders
 6632     FPU    : S3(2);
 6633 %}
 6634 
 6635 // Float reg-reg operation
 6636 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6637 %{
 6638     instruction_count(4);
 6639     dst    : S4(write);
 6640     src1   : S3(read);
 6641     src2   : S3(read);
 6642     src3   : S3(read);
 6643     DECODE : S0(4);     // any 3 decoders
 6644     FPU    : S3(2);
 6645 %}
 6646 
 6647 // Float reg-reg operation
 6648 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6649 %{
 6650     instruction_count(4);
 6651     dst    : S4(write);
 6652     src1   : S3(read);
 6653     src2   : S3(read);
 6654     src3   : S3(read);
 6655     DECODE : S1(3);     // any 3 decoders
 6656     D0     : S0;        // Big decoder only
 6657     FPU    : S3(2);
 6658     MEM    : S3;
 6659 %}
 6660 
 6661 // Float reg-mem operation
 6662 pipe_class fpu_reg_mem(regD dst, memory mem)
 6663 %{
 6664     instruction_count(2);
 6665     dst    : S5(write);
 6666     mem    : S3(read);
 6667     D0     : S0;        // big decoder only
 6668     DECODE : S1;        // any decoder for FPU POP
 6669     FPU    : S4;
 6670     MEM    : S3;        // any mem
 6671 %}
 6672 
 6673 // Float reg-mem operation
 6674 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6675 %{
 6676     instruction_count(3);
 6677     dst    : S5(write);
 6678     src1   : S3(read);
 6679     mem    : S3(read);
 6680     D0     : S0;        // big decoder only
 6681     DECODE : S1(2);     // any decoder for FPU POP
 6682     FPU    : S4;
 6683     MEM    : S3;        // any mem
 6684 %}
 6685 
 6686 // Float mem-reg operation
 6687 pipe_class fpu_mem_reg(memory mem, regD src)
 6688 %{
 6689     instruction_count(2);
 6690     src    : S5(read);
 6691     mem    : S3(read);
 6692     DECODE : S0;        // any decoder for FPU PUSH
 6693     D0     : S1;        // big decoder only
 6694     FPU    : S4;
 6695     MEM    : S3;        // any mem
 6696 %}
 6697 
 6698 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6699 %{
 6700     instruction_count(3);
 6701     src1   : S3(read);
 6702     src2   : S3(read);
 6703     mem    : S3(read);
 6704     DECODE : S0(2);     // any decoder for FPU PUSH
 6705     D0     : S1;        // big decoder only
 6706     FPU    : S4;
 6707     MEM    : S3;        // any mem
 6708 %}
 6709 
 6710 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6711 %{
 6712     instruction_count(3);
 6713     src1   : S3(read);
 6714     src2   : S3(read);
 6715     mem    : S4(read);
 6716     DECODE : S0;        // any decoder for FPU PUSH
 6717     D0     : S0(2);     // big decoder only
 6718     FPU    : S4;
 6719     MEM    : S3(2);     // any mem
 6720 %}
 6721 
 6722 pipe_class fpu_mem_mem(memory dst, memory src1)
 6723 %{
 6724     instruction_count(2);
 6725     src1   : S3(read);
 6726     dst    : S4(read);
 6727     D0     : S0(2);     // big decoder only
 6728     MEM    : S3(2);     // any mem
 6729 %}
 6730 
 6731 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6732 %{
 6733     instruction_count(3);
 6734     src1   : S3(read);
 6735     src2   : S3(read);
 6736     dst    : S4(read);
 6737     D0     : S0(3);     // big decoder only
 6738     FPU    : S4;
 6739     MEM    : S3(3);     // any mem
 6740 %}
 6741 
 6742 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6743 %{
 6744     instruction_count(3);
 6745     src1   : S4(read);
 6746     mem    : S4(read);
 6747     DECODE : S0;        // any decoder for FPU PUSH
 6748     D0     : S0(2);     // big decoder only
 6749     FPU    : S4;
 6750     MEM    : S3(2);     // any mem
 6751 %}
 6752 
 6753 // Float load constant
 6754 pipe_class fpu_reg_con(regD dst)
 6755 %{
 6756     instruction_count(2);
 6757     dst    : S5(write);
 6758     D0     : S0;        // big decoder only for the load
 6759     DECODE : S1;        // any decoder for FPU POP
 6760     FPU    : S4;
 6761     MEM    : S3;        // any mem
 6762 %}
 6763 
 6764 // Float load constant
 6765 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6766 %{
 6767     instruction_count(3);
 6768     dst    : S5(write);
 6769     src    : S3(read);
 6770     D0     : S0;        // big decoder only for the load
 6771     DECODE : S1(2);     // any decoder for FPU POP
 6772     FPU    : S4;
 6773     MEM    : S3;        // any mem
 6774 %}
 6775 
 6776 // UnConditional branch
 6777 pipe_class pipe_jmp(label labl)
 6778 %{
 6779     single_instruction;
 6780     BR   : S3;
 6781 %}
 6782 
 6783 // Conditional branch
 6784 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6785 %{
 6786     single_instruction;
 6787     cr    : S1(read);
 6788     BR    : S3;
 6789 %}
 6790 
 6791 // Allocation idiom
 6792 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6793 %{
 6794     instruction_count(1); force_serialization;
 6795     fixed_latency(6);
 6796     heap_ptr : S3(read);
 6797     DECODE   : S0(3);
 6798     D0       : S2;
 6799     MEM      : S3;
 6800     ALU      : S3(2);
 6801     dst      : S5(write);
 6802     BR       : S5;
 6803 %}
 6804 
 6805 // Generic big/slow expanded idiom
 6806 pipe_class pipe_slow()
 6807 %{
 6808     instruction_count(10); multiple_bundles; force_serialization;
 6809     fixed_latency(100);
 6810     D0  : S0(2);
 6811     MEM : S3(2);
 6812 %}
 6813 
 6814 // The real do-nothing guy
 6815 pipe_class empty()
 6816 %{
 6817     instruction_count(0);
 6818 %}
 6819 
 6820 // Define the class for the Nop node
 6821 define
 6822 %{
 6823    MachNop = empty;
 6824 %}
 6825 
 6826 %}
 6827 
 6828 //----------INSTRUCTIONS-------------------------------------------------------
 6829 //
 6830 // match      -- States which machine-independent subtree may be replaced
 6831 //               by this instruction.
 6832 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6833 //               selection to identify a minimum cost tree of machine
 6834 //               instructions that matches a tree of machine-independent
 6835 //               instructions.
 6836 // format     -- A string providing the disassembly for this instruction.
 6837 //               The value of an instruction's operand may be inserted
 6838 //               by referring to it with a '$' prefix.
 6839 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6840 //               to within an encode class as $primary, $secondary, and $tertiary
 6841 //               rrspectively.  The primary opcode is commonly used to
 6842 //               indicate the type of machine instruction, while secondary
 6843 //               and tertiary are often used for prefix options or addressing
 6844 //               modes.
 6845 // ins_encode -- A list of encode classes with parameters. The encode class
 6846 //               name must have been defined in an 'enc_class' specification
 6847 //               in the encode section of the architecture description.
 6848 
 6849 // ============================================================================
 6850 
 6851 instruct ShouldNotReachHere() %{
 6852   match(Halt);
 6853   format %{ "stop\t# ShouldNotReachHere" %}
 6854   ins_encode %{
 6855     if (is_reachable()) {
 6856       const char* str = __ code_string(_halt_reason);
 6857       __ stop(str);
 6858     }
 6859   %}
 6860   ins_pipe(pipe_slow);
 6861 %}
 6862 
 6863 // ============================================================================
 6864 
 6865 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6866 // Load Float
 6867 instruct MoveF2VL(vlRegF dst, regF src) %{
 6868   match(Set dst src);
 6869   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6870   ins_encode %{
 6871     ShouldNotReachHere();
 6872   %}
 6873   ins_pipe( fpu_reg_reg );
 6874 %}
 6875 
 6876 // Load Float
 6877 instruct MoveF2LEG(legRegF dst, regF src) %{
 6878   match(Set dst src);
 6879   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6880   ins_encode %{
 6881     ShouldNotReachHere();
 6882   %}
 6883   ins_pipe( fpu_reg_reg );
 6884 %}
 6885 
 6886 // Load Float
 6887 instruct MoveVL2F(regF dst, vlRegF src) %{
 6888   match(Set dst src);
 6889   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6890   ins_encode %{
 6891     ShouldNotReachHere();
 6892   %}
 6893   ins_pipe( fpu_reg_reg );
 6894 %}
 6895 
 6896 // Load Float
 6897 instruct MoveLEG2F(regF dst, legRegF src) %{
 6898   match(Set dst src);
 6899   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6900   ins_encode %{
 6901     ShouldNotReachHere();
 6902   %}
 6903   ins_pipe( fpu_reg_reg );
 6904 %}
 6905 
 6906 // Load Double
 6907 instruct MoveD2VL(vlRegD dst, regD src) %{
 6908   match(Set dst src);
 6909   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6910   ins_encode %{
 6911     ShouldNotReachHere();
 6912   %}
 6913   ins_pipe( fpu_reg_reg );
 6914 %}
 6915 
 6916 // Load Double
 6917 instruct MoveD2LEG(legRegD dst, regD src) %{
 6918   match(Set dst src);
 6919   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6920   ins_encode %{
 6921     ShouldNotReachHere();
 6922   %}
 6923   ins_pipe( fpu_reg_reg );
 6924 %}
 6925 
 6926 // Load Double
 6927 instruct MoveVL2D(regD dst, vlRegD src) %{
 6928   match(Set dst src);
 6929   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6930   ins_encode %{
 6931     ShouldNotReachHere();
 6932   %}
 6933   ins_pipe( fpu_reg_reg );
 6934 %}
 6935 
 6936 // Load Double
 6937 instruct MoveLEG2D(regD dst, legRegD src) %{
 6938   match(Set dst src);
 6939   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6940   ins_encode %{
 6941     ShouldNotReachHere();
 6942   %}
 6943   ins_pipe( fpu_reg_reg );
 6944 %}
 6945 
 6946 //----------Load/Store/Move Instructions---------------------------------------
 6947 //----------Load Instructions--------------------------------------------------
 6948 
 6949 // Load Byte (8 bit signed)
 6950 instruct loadB(rRegI dst, memory mem)
 6951 %{
 6952   match(Set dst (LoadB mem));
 6953 
 6954   ins_cost(125);
 6955   format %{ "movsbl  $dst, $mem\t# byte" %}
 6956 
 6957   ins_encode %{
 6958     __ movsbl($dst$$Register, $mem$$Address);
 6959   %}
 6960 
 6961   ins_pipe(ialu_reg_mem);
 6962 %}
 6963 
 6964 // Load Byte (8 bit signed) into Long Register
 6965 instruct loadB2L(rRegL dst, memory mem)
 6966 %{
 6967   match(Set dst (ConvI2L (LoadB mem)));
 6968 
 6969   ins_cost(125);
 6970   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6971 
 6972   ins_encode %{
 6973     __ movsbq($dst$$Register, $mem$$Address);
 6974   %}
 6975 
 6976   ins_pipe(ialu_reg_mem);
 6977 %}
 6978 
 6979 // Load Unsigned Byte (8 bit UNsigned)
 6980 instruct loadUB(rRegI dst, memory mem)
 6981 %{
 6982   match(Set dst (LoadUB mem));
 6983 
 6984   ins_cost(125);
 6985   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6986 
 6987   ins_encode %{
 6988     __ movzbl($dst$$Register, $mem$$Address);
 6989   %}
 6990 
 6991   ins_pipe(ialu_reg_mem);
 6992 %}
 6993 
 6994 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6995 instruct loadUB2L(rRegL dst, memory mem)
 6996 %{
 6997   match(Set dst (ConvI2L (LoadUB mem)));
 6998 
 6999   ins_cost(125);
 7000   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 7001 
 7002   ins_encode %{
 7003     __ movzbq($dst$$Register, $mem$$Address);
 7004   %}
 7005 
 7006   ins_pipe(ialu_reg_mem);
 7007 %}
 7008 
 7009 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 7010 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7011   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 7012   effect(KILL cr);
 7013 
 7014   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 7015             "andl    $dst, right_n_bits($mask, 8)" %}
 7016   ins_encode %{
 7017     Register Rdst = $dst$$Register;
 7018     __ movzbq(Rdst, $mem$$Address);
 7019     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 7020   %}
 7021   ins_pipe(ialu_reg_mem);
 7022 %}
 7023 
 7024 // Load Short (16 bit signed)
 7025 instruct loadS(rRegI dst, memory mem)
 7026 %{
 7027   match(Set dst (LoadS mem));
 7028 
 7029   ins_cost(125);
 7030   format %{ "movswl $dst, $mem\t# short" %}
 7031 
 7032   ins_encode %{
 7033     __ movswl($dst$$Register, $mem$$Address);
 7034   %}
 7035 
 7036   ins_pipe(ialu_reg_mem);
 7037 %}
 7038 
 7039 // Load Short (16 bit signed) to Byte (8 bit signed)
 7040 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7041   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 7042 
 7043   ins_cost(125);
 7044   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 7045   ins_encode %{
 7046     __ movsbl($dst$$Register, $mem$$Address);
 7047   %}
 7048   ins_pipe(ialu_reg_mem);
 7049 %}
 7050 
 7051 // Load Short (16 bit signed) into Long Register
 7052 instruct loadS2L(rRegL dst, memory mem)
 7053 %{
 7054   match(Set dst (ConvI2L (LoadS mem)));
 7055 
 7056   ins_cost(125);
 7057   format %{ "movswq $dst, $mem\t# short -> long" %}
 7058 
 7059   ins_encode %{
 7060     __ movswq($dst$$Register, $mem$$Address);
 7061   %}
 7062 
 7063   ins_pipe(ialu_reg_mem);
 7064 %}
 7065 
 7066 // Load Unsigned Short/Char (16 bit UNsigned)
 7067 instruct loadUS(rRegI dst, memory mem)
 7068 %{
 7069   match(Set dst (LoadUS mem));
 7070 
 7071   ins_cost(125);
 7072   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 7073 
 7074   ins_encode %{
 7075     __ movzwl($dst$$Register, $mem$$Address);
 7076   %}
 7077 
 7078   ins_pipe(ialu_reg_mem);
 7079 %}
 7080 
 7081 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 7082 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7083   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 7084 
 7085   ins_cost(125);
 7086   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 7087   ins_encode %{
 7088     __ movsbl($dst$$Register, $mem$$Address);
 7089   %}
 7090   ins_pipe(ialu_reg_mem);
 7091 %}
 7092 
 7093 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 7094 instruct loadUS2L(rRegL dst, memory mem)
 7095 %{
 7096   match(Set dst (ConvI2L (LoadUS mem)));
 7097 
 7098   ins_cost(125);
 7099   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 7100 
 7101   ins_encode %{
 7102     __ movzwq($dst$$Register, $mem$$Address);
 7103   %}
 7104 
 7105   ins_pipe(ialu_reg_mem);
 7106 %}
 7107 
 7108 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7109 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7110   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7111 
 7112   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7113   ins_encode %{
 7114     __ movzbq($dst$$Register, $mem$$Address);
 7115   %}
 7116   ins_pipe(ialu_reg_mem);
 7117 %}
 7118 
 7119 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7120 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7121   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7122   effect(KILL cr);
 7123 
 7124   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7125             "andl    $dst, right_n_bits($mask, 16)" %}
 7126   ins_encode %{
 7127     Register Rdst = $dst$$Register;
 7128     __ movzwq(Rdst, $mem$$Address);
 7129     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7130   %}
 7131   ins_pipe(ialu_reg_mem);
 7132 %}
 7133 
 7134 // Load Integer
 7135 instruct loadI(rRegI dst, memory mem)
 7136 %{
 7137   match(Set dst (LoadI mem));
 7138 
 7139   ins_cost(125);
 7140   format %{ "movl    $dst, $mem\t# int" %}
 7141 
 7142   ins_encode %{
 7143     __ movl($dst$$Register, $mem$$Address);
 7144   %}
 7145 
 7146   ins_pipe(ialu_reg_mem);
 7147 %}
 7148 
 7149 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7150 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7151   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7152 
 7153   ins_cost(125);
 7154   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7155   ins_encode %{
 7156     __ movsbl($dst$$Register, $mem$$Address);
 7157   %}
 7158   ins_pipe(ialu_reg_mem);
 7159 %}
 7160 
 7161 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7162 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7163   match(Set dst (AndI (LoadI mem) mask));
 7164 
 7165   ins_cost(125);
 7166   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7167   ins_encode %{
 7168     __ movzbl($dst$$Register, $mem$$Address);
 7169   %}
 7170   ins_pipe(ialu_reg_mem);
 7171 %}
 7172 
 7173 // Load Integer (32 bit signed) to Short (16 bit signed)
 7174 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7175   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7176 
 7177   ins_cost(125);
 7178   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7179   ins_encode %{
 7180     __ movswl($dst$$Register, $mem$$Address);
 7181   %}
 7182   ins_pipe(ialu_reg_mem);
 7183 %}
 7184 
 7185 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7186 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7187   match(Set dst (AndI (LoadI mem) mask));
 7188 
 7189   ins_cost(125);
 7190   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7191   ins_encode %{
 7192     __ movzwl($dst$$Register, $mem$$Address);
 7193   %}
 7194   ins_pipe(ialu_reg_mem);
 7195 %}
 7196 
 7197 // Load Integer into Long Register
 7198 instruct loadI2L(rRegL dst, memory mem)
 7199 %{
 7200   match(Set dst (ConvI2L (LoadI mem)));
 7201 
 7202   ins_cost(125);
 7203   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7204 
 7205   ins_encode %{
 7206     __ movslq($dst$$Register, $mem$$Address);
 7207   %}
 7208 
 7209   ins_pipe(ialu_reg_mem);
 7210 %}
 7211 
 7212 // Load Integer with mask 0xFF into Long Register
 7213 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7214   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7215 
 7216   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7217   ins_encode %{
 7218     __ movzbq($dst$$Register, $mem$$Address);
 7219   %}
 7220   ins_pipe(ialu_reg_mem);
 7221 %}
 7222 
 7223 // Load Integer with mask 0xFFFF into Long Register
 7224 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7225   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7226 
 7227   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7228   ins_encode %{
 7229     __ movzwq($dst$$Register, $mem$$Address);
 7230   %}
 7231   ins_pipe(ialu_reg_mem);
 7232 %}
 7233 
 7234 // Load Integer with a 31-bit mask into Long Register
 7235 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7236   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7237   effect(KILL cr);
 7238 
 7239   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7240             "andl    $dst, $mask" %}
 7241   ins_encode %{
 7242     Register Rdst = $dst$$Register;
 7243     __ movl(Rdst, $mem$$Address);
 7244     __ andl(Rdst, $mask$$constant);
 7245   %}
 7246   ins_pipe(ialu_reg_mem);
 7247 %}
 7248 
 7249 // Load Unsigned Integer into Long Register
 7250 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7251 %{
 7252   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7253 
 7254   ins_cost(125);
 7255   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7256 
 7257   ins_encode %{
 7258     __ movl($dst$$Register, $mem$$Address);
 7259   %}
 7260 
 7261   ins_pipe(ialu_reg_mem);
 7262 %}
 7263 
 7264 // Load Long
 7265 instruct loadL(rRegL dst, memory mem)
 7266 %{
 7267   match(Set dst (LoadL mem));
 7268 
 7269   ins_cost(125);
 7270   format %{ "movq    $dst, $mem\t# long" %}
 7271 
 7272   ins_encode %{
 7273     __ movq($dst$$Register, $mem$$Address);
 7274   %}
 7275 
 7276   ins_pipe(ialu_reg_mem); // XXX
 7277 %}
 7278 
 7279 // Load Range
 7280 instruct loadRange(rRegI dst, memory mem)
 7281 %{
 7282   match(Set dst (LoadRange mem));
 7283 
 7284   ins_cost(125); // XXX
 7285   format %{ "movl    $dst, $mem\t# range" %}
 7286   ins_encode %{
 7287     __ movl($dst$$Register, $mem$$Address);
 7288   %}
 7289   ins_pipe(ialu_reg_mem);
 7290 %}
 7291 
 7292 // Load Pointer
 7293 instruct loadP(rRegP dst, memory mem)
 7294 %{
 7295   match(Set dst (LoadP mem));
 7296   predicate(n->as_Load()->barrier_data() == 0);
 7297 
 7298   ins_cost(125); // XXX
 7299   format %{ "movq    $dst, $mem\t# ptr" %}
 7300   ins_encode %{
 7301     __ movq($dst$$Register, $mem$$Address);
 7302   %}
 7303   ins_pipe(ialu_reg_mem); // XXX
 7304 %}
 7305 
 7306 // Load Compressed Pointer
 7307 instruct loadN(rRegN dst, memory mem)
 7308 %{
 7309    predicate(n->as_Load()->barrier_data() == 0);
 7310    match(Set dst (LoadN mem));
 7311 
 7312    ins_cost(125); // XXX
 7313    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7314    ins_encode %{
 7315      __ movl($dst$$Register, $mem$$Address);
 7316    %}
 7317    ins_pipe(ialu_reg_mem); // XXX
 7318 %}
 7319 
 7320 
 7321 // Load Klass Pointer
 7322 instruct loadKlass(rRegP dst, memory mem)
 7323 %{
 7324   match(Set dst (LoadKlass mem));
 7325 
 7326   ins_cost(125); // XXX
 7327   format %{ "movq    $dst, $mem\t# class" %}
 7328   ins_encode %{
 7329     __ movq($dst$$Register, $mem$$Address);
 7330   %}
 7331   ins_pipe(ialu_reg_mem); // XXX
 7332 %}
 7333 
 7334 // Load narrow Klass Pointer
 7335 instruct loadNKlass(rRegN dst, memory mem)
 7336 %{
 7337   predicate(!UseCompactObjectHeaders);
 7338   match(Set dst (LoadNKlass mem));
 7339 
 7340   ins_cost(125); // XXX
 7341   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7342   ins_encode %{
 7343     __ movl($dst$$Register, $mem$$Address);
 7344   %}
 7345   ins_pipe(ialu_reg_mem); // XXX
 7346 %}
 7347 
 7348 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7349 %{
 7350   predicate(UseCompactObjectHeaders);
 7351   match(Set dst (LoadNKlass mem));
 7352   effect(KILL cr);
 7353   ins_cost(125);
 7354   format %{
 7355     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7356     "shrl    $dst, markWord::klass_shift_at_offset"
 7357   %}
 7358   ins_encode %{
 7359     if (UseAPX) {
 7360       __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
 7361     }
 7362     else {
 7363       __ movl($dst$$Register, $mem$$Address);
 7364       __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7365     }
 7366   %}
 7367   ins_pipe(ialu_reg_mem);
 7368 %}
 7369 
 7370 // Load Float
 7371 instruct loadF(regF dst, memory mem)
 7372 %{
 7373   match(Set dst (LoadF mem));
 7374 
 7375   ins_cost(145); // XXX
 7376   format %{ "movss   $dst, $mem\t# float" %}
 7377   ins_encode %{
 7378     __ movflt($dst$$XMMRegister, $mem$$Address);
 7379   %}
 7380   ins_pipe(pipe_slow); // XXX
 7381 %}
 7382 
 7383 // Load Double
 7384 instruct loadD_partial(regD dst, memory mem)
 7385 %{
 7386   predicate(!UseXmmLoadAndClearUpper);
 7387   match(Set dst (LoadD mem));
 7388 
 7389   ins_cost(145); // XXX
 7390   format %{ "movlpd  $dst, $mem\t# double" %}
 7391   ins_encode %{
 7392     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7393   %}
 7394   ins_pipe(pipe_slow); // XXX
 7395 %}
 7396 
 7397 instruct loadD(regD dst, memory mem)
 7398 %{
 7399   predicate(UseXmmLoadAndClearUpper);
 7400   match(Set dst (LoadD mem));
 7401 
 7402   ins_cost(145); // XXX
 7403   format %{ "movsd   $dst, $mem\t# double" %}
 7404   ins_encode %{
 7405     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7406   %}
 7407   ins_pipe(pipe_slow); // XXX
 7408 %}
 7409 
 7410 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
 7411 %{
 7412   match(Set dst con);
 7413 
 7414   format %{ "leaq  $dst, $con\t# AOT Runtime Constants Address" %}
 7415 
 7416   ins_encode %{
 7417     __ load_aotrc_address($dst$$Register, (address)$con$$constant);
 7418   %}
 7419 
 7420   ins_pipe(ialu_reg_fat);
 7421 %}
 7422 
 7423 // min = java.lang.Math.min(float a, float b)
 7424 // max = java.lang.Math.max(float a, float b)
 7425 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
 7426 %{
 7427   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7428   match(Set dst (MaxF a b));
 7429   match(Set dst (MinF a b));
 7430 
 7431   format %{ "minmaxF $dst, $a, $b" %}
 7432   ins_encode %{
 7433     int opcode = this->ideal_Opcode();
 7434     __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7435   %}
 7436   ins_pipe( pipe_slow );
 7437 %}
 7438 
 7439 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, regF xtmp, rRegI rtmp, rFlagsReg cr)
 7440 %{
 7441   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7442   match(Set dst (MaxF a b));
 7443   match(Set dst (MinF a b));
 7444   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7445 
 7446   format %{ "minmaxF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7447   ins_encode %{
 7448     int opcode = this->ideal_Opcode();
 7449     bool min = (opcode == Op_MinF) ? true : false;
 7450     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7451                     min, fp_prec_flt /*pt*/);
 7452   %}
 7453   ins_pipe( pipe_slow );
 7454 %}
 7455 
 7456 // min = java.lang.Math.min(float a, float b)
 7457 // max = java.lang.Math.max(float a, float b)
 7458 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
 7459 %{
 7460   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7461   match(Set dst (MaxF a b));
 7462   match(Set dst (MinF a b));
 7463   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7464 
 7465   format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7466   ins_encode %{
 7467     int opcode = this->ideal_Opcode();
 7468     int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
 7469     __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7470                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7471   %}
 7472   ins_pipe( pipe_slow );
 7473 %}
 7474 
 7475 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr)
 7476 %{
 7477   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7478   match(Set dst (MaxF a b));
 7479   match(Set dst (MinF a b));
 7480   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7481 
 7482   format %{ "minmaxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7483   ins_encode %{
 7484     int opcode = this->ideal_Opcode();
 7485     bool min = (opcode == Op_MinF) ? true : false;
 7486     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7487                     min, fp_prec_flt /*pt*/);
 7488   %}
 7489   ins_pipe( pipe_slow );
 7490 %}
 7491 
 7492 // min = java.lang.Math.min(double a, double b)
 7493 // max = java.lang.Math.max(double a, double b)
 7494 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
 7495 %{
 7496   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7497   match(Set dst (MaxD a b));
 7498   match(Set dst (MinD a b));
 7499 
 7500   format %{ "minmaxD $dst, $a, $b" %}
 7501   ins_encode %{
 7502     int opcode = this->ideal_Opcode();
 7503     __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7504   %}
 7505   ins_pipe( pipe_slow );
 7506 %}
 7507 
 7508 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, regD xtmp, rRegI rtmp, rFlagsReg cr)
 7509 %{
 7510   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7511   match(Set dst (MaxD a b));
 7512   match(Set dst (MinD a b));
 7513   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7514 
 7515   format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7516   ins_encode %{
 7517     int opcode = this->ideal_Opcode();
 7518     bool min = (opcode == Op_MinD) ? true : false;
 7519     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7520                     min, fp_prec_dbl /*pt*/);
 7521   %}
 7522   ins_pipe( pipe_slow );
 7523 %}
 7524 
 7525 // min = java.lang.Math.min(double a, double b)
 7526 // max = java.lang.Math.max(double a, double b)
 7527 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
 7528 %{
 7529   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7530   match(Set dst (MaxD a b));
 7531   match(Set dst (MinD a b));
 7532   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7533 
 7534   format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7535   ins_encode %{
 7536     int opcode = this->ideal_Opcode();
 7537     int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
 7538     __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7539                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7540   %}
 7541   ins_pipe( pipe_slow );
 7542 %}
 7543 
 7544 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr)
 7545 %{
 7546   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7547   match(Set dst (MaxD a b));
 7548   match(Set dst (MinD a b));
 7549   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7550 
 7551   format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7552   ins_encode %{
 7553     int opcode = this->ideal_Opcode();
 7554     bool min = (opcode == Op_MinD) ? true : false;
 7555     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7556                     min, fp_prec_dbl /*pt*/);
 7557   %}
 7558   ins_pipe( pipe_slow );
 7559 %}
 7560 
 7561 // Load Effective Address
 7562 instruct leaP8(rRegP dst, indOffset8 mem)
 7563 %{
 7564   match(Set dst mem);
 7565 
 7566   ins_cost(110); // XXX
 7567   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7568   ins_encode %{
 7569     __ leaq($dst$$Register, $mem$$Address);
 7570   %}
 7571   ins_pipe(ialu_reg_reg_fat);
 7572 %}
 7573 
 7574 instruct leaP32(rRegP dst, indOffset32 mem)
 7575 %{
 7576   match(Set dst mem);
 7577 
 7578   ins_cost(110);
 7579   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7580   ins_encode %{
 7581     __ leaq($dst$$Register, $mem$$Address);
 7582   %}
 7583   ins_pipe(ialu_reg_reg_fat);
 7584 %}
 7585 
 7586 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7587 %{
 7588   match(Set dst mem);
 7589 
 7590   ins_cost(110);
 7591   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7592   ins_encode %{
 7593     __ leaq($dst$$Register, $mem$$Address);
 7594   %}
 7595   ins_pipe(ialu_reg_reg_fat);
 7596 %}
 7597 
 7598 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7599 %{
 7600   match(Set dst mem);
 7601 
 7602   ins_cost(110);
 7603   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7604   ins_encode %{
 7605     __ leaq($dst$$Register, $mem$$Address);
 7606   %}
 7607   ins_pipe(ialu_reg_reg_fat);
 7608 %}
 7609 
 7610 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7611 %{
 7612   match(Set dst mem);
 7613 
 7614   ins_cost(110);
 7615   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7616   ins_encode %{
 7617     __ leaq($dst$$Register, $mem$$Address);
 7618   %}
 7619   ins_pipe(ialu_reg_reg_fat);
 7620 %}
 7621 
 7622 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7623 %{
 7624   match(Set dst mem);
 7625 
 7626   ins_cost(110);
 7627   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7628   ins_encode %{
 7629     __ leaq($dst$$Register, $mem$$Address);
 7630   %}
 7631   ins_pipe(ialu_reg_reg_fat);
 7632 %}
 7633 
 7634 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7635 %{
 7636   match(Set dst mem);
 7637 
 7638   ins_cost(110);
 7639   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7640   ins_encode %{
 7641     __ leaq($dst$$Register, $mem$$Address);
 7642   %}
 7643   ins_pipe(ialu_reg_reg_fat);
 7644 %}
 7645 
 7646 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7647 %{
 7648   match(Set dst mem);
 7649 
 7650   ins_cost(110);
 7651   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7652   ins_encode %{
 7653     __ leaq($dst$$Register, $mem$$Address);
 7654   %}
 7655   ins_pipe(ialu_reg_reg_fat);
 7656 %}
 7657 
 7658 // Load Effective Address which uses Narrow (32-bits) oop
 7659 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7660 %{
 7661   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7662   match(Set dst mem);
 7663 
 7664   ins_cost(110);
 7665   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7666   ins_encode %{
 7667     __ leaq($dst$$Register, $mem$$Address);
 7668   %}
 7669   ins_pipe(ialu_reg_reg_fat);
 7670 %}
 7671 
 7672 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7673 %{
 7674   predicate(CompressedOops::shift() == 0);
 7675   match(Set dst mem);
 7676 
 7677   ins_cost(110); // XXX
 7678   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7679   ins_encode %{
 7680     __ leaq($dst$$Register, $mem$$Address);
 7681   %}
 7682   ins_pipe(ialu_reg_reg_fat);
 7683 %}
 7684 
 7685 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7686 %{
 7687   predicate(CompressedOops::shift() == 0);
 7688   match(Set dst mem);
 7689 
 7690   ins_cost(110);
 7691   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7692   ins_encode %{
 7693     __ leaq($dst$$Register, $mem$$Address);
 7694   %}
 7695   ins_pipe(ialu_reg_reg_fat);
 7696 %}
 7697 
 7698 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7699 %{
 7700   predicate(CompressedOops::shift() == 0);
 7701   match(Set dst mem);
 7702 
 7703   ins_cost(110);
 7704   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7705   ins_encode %{
 7706     __ leaq($dst$$Register, $mem$$Address);
 7707   %}
 7708   ins_pipe(ialu_reg_reg_fat);
 7709 %}
 7710 
 7711 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7712 %{
 7713   predicate(CompressedOops::shift() == 0);
 7714   match(Set dst mem);
 7715 
 7716   ins_cost(110);
 7717   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7718   ins_encode %{
 7719     __ leaq($dst$$Register, $mem$$Address);
 7720   %}
 7721   ins_pipe(ialu_reg_reg_fat);
 7722 %}
 7723 
 7724 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7725 %{
 7726   predicate(CompressedOops::shift() == 0);
 7727   match(Set dst mem);
 7728 
 7729   ins_cost(110);
 7730   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7731   ins_encode %{
 7732     __ leaq($dst$$Register, $mem$$Address);
 7733   %}
 7734   ins_pipe(ialu_reg_reg_fat);
 7735 %}
 7736 
 7737 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7738 %{
 7739   predicate(CompressedOops::shift() == 0);
 7740   match(Set dst mem);
 7741 
 7742   ins_cost(110);
 7743   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7744   ins_encode %{
 7745     __ leaq($dst$$Register, $mem$$Address);
 7746   %}
 7747   ins_pipe(ialu_reg_reg_fat);
 7748 %}
 7749 
 7750 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7751 %{
 7752   predicate(CompressedOops::shift() == 0);
 7753   match(Set dst mem);
 7754 
 7755   ins_cost(110);
 7756   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7757   ins_encode %{
 7758     __ leaq($dst$$Register, $mem$$Address);
 7759   %}
 7760   ins_pipe(ialu_reg_reg_fat);
 7761 %}
 7762 
 7763 instruct loadConI(rRegI dst, immI src)
 7764 %{
 7765   match(Set dst src);
 7766 
 7767   format %{ "movl    $dst, $src\t# int" %}
 7768   ins_encode %{
 7769     __ movl($dst$$Register, $src$$constant);
 7770   %}
 7771   ins_pipe(ialu_reg_fat); // XXX
 7772 %}
 7773 
 7774 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7775 %{
 7776   match(Set dst src);
 7777   effect(KILL cr);
 7778 
 7779   ins_cost(50);
 7780   format %{ "xorl    $dst, $dst\t# int" %}
 7781   ins_encode %{
 7782     __ xorl($dst$$Register, $dst$$Register);
 7783   %}
 7784   ins_pipe(ialu_reg);
 7785 %}
 7786 
 7787 instruct loadConL(rRegL dst, immL src)
 7788 %{
 7789   match(Set dst src);
 7790 
 7791   ins_cost(150);
 7792   format %{ "movq    $dst, $src\t# long" %}
 7793   ins_encode %{
 7794     __ mov64($dst$$Register, $src$$constant);
 7795   %}
 7796   ins_pipe(ialu_reg);
 7797 %}
 7798 
 7799 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7800 %{
 7801   match(Set dst src);
 7802   effect(KILL cr);
 7803 
 7804   ins_cost(50);
 7805   format %{ "xorl    $dst, $dst\t# long" %}
 7806   ins_encode %{
 7807     __ xorl($dst$$Register, $dst$$Register);
 7808   %}
 7809   ins_pipe(ialu_reg); // XXX
 7810 %}
 7811 
 7812 instruct loadConUL32(rRegL dst, immUL32 src)
 7813 %{
 7814   match(Set dst src);
 7815 
 7816   ins_cost(60);
 7817   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7818   ins_encode %{
 7819     __ movl($dst$$Register, $src$$constant);
 7820   %}
 7821   ins_pipe(ialu_reg);
 7822 %}
 7823 
 7824 instruct loadConL32(rRegL dst, immL32 src)
 7825 %{
 7826   match(Set dst src);
 7827 
 7828   ins_cost(70);
 7829   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7830   ins_encode %{
 7831     __ movq($dst$$Register, $src$$constant);
 7832   %}
 7833   ins_pipe(ialu_reg);
 7834 %}
 7835 
 7836 instruct loadConP(rRegP dst, immP con) %{
 7837   match(Set dst con);
 7838 
 7839   format %{ "movq    $dst, $con\t# ptr" %}
 7840   ins_encode %{
 7841     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7842   %}
 7843   ins_pipe(ialu_reg_fat); // XXX
 7844 %}
 7845 
 7846 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7847 %{
 7848   match(Set dst src);
 7849   effect(KILL cr);
 7850 
 7851   ins_cost(50);
 7852   format %{ "xorl    $dst, $dst\t# ptr" %}
 7853   ins_encode %{
 7854     __ xorl($dst$$Register, $dst$$Register);
 7855   %}
 7856   ins_pipe(ialu_reg);
 7857 %}
 7858 
 7859 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7860 %{
 7861   match(Set dst src);
 7862   effect(KILL cr);
 7863 
 7864   ins_cost(60);
 7865   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7866   ins_encode %{
 7867     __ movl($dst$$Register, $src$$constant);
 7868   %}
 7869   ins_pipe(ialu_reg);
 7870 %}
 7871 
 7872 instruct loadConF(regF dst, immF con) %{
 7873   match(Set dst con);
 7874   ins_cost(125);
 7875   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7876   ins_encode %{
 7877     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7878   %}
 7879   ins_pipe(pipe_slow);
 7880 %}
 7881 
 7882 instruct loadConH(regF dst, immH con) %{
 7883   match(Set dst con);
 7884   ins_cost(125);
 7885   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7886   ins_encode %{
 7887     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7888   %}
 7889   ins_pipe(pipe_slow);
 7890 %}
 7891 
 7892 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7893   match(Set dst src);
 7894   effect(KILL cr);
 7895   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7896   ins_encode %{
 7897     __ xorq($dst$$Register, $dst$$Register);
 7898   %}
 7899   ins_pipe(ialu_reg);
 7900 %}
 7901 
 7902 instruct loadConN(rRegN dst, immN src) %{
 7903   match(Set dst src);
 7904 
 7905   ins_cost(125);
 7906   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7907   ins_encode %{
 7908     address con = (address)$src$$constant;
 7909     if (con == nullptr) {
 7910       ShouldNotReachHere();
 7911     } else {
 7912       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7913     }
 7914   %}
 7915   ins_pipe(ialu_reg_fat); // XXX
 7916 %}
 7917 
 7918 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7919   match(Set dst src);
 7920 
 7921   ins_cost(125);
 7922   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7923   ins_encode %{
 7924     address con = (address)$src$$constant;
 7925     if (con == nullptr) {
 7926       ShouldNotReachHere();
 7927     } else {
 7928       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7929     }
 7930   %}
 7931   ins_pipe(ialu_reg_fat); // XXX
 7932 %}
 7933 
 7934 instruct loadConF0(regF dst, immF0 src)
 7935 %{
 7936   match(Set dst src);
 7937   ins_cost(100);
 7938 
 7939   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7940   ins_encode %{
 7941     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7942   %}
 7943   ins_pipe(pipe_slow);
 7944 %}
 7945 
 7946 // Use the same format since predicate() can not be used here.
 7947 instruct loadConD(regD dst, immD con) %{
 7948   match(Set dst con);
 7949   ins_cost(125);
 7950   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7951   ins_encode %{
 7952     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7953   %}
 7954   ins_pipe(pipe_slow);
 7955 %}
 7956 
 7957 instruct loadConD0(regD dst, immD0 src)
 7958 %{
 7959   match(Set dst src);
 7960   ins_cost(100);
 7961 
 7962   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7963   ins_encode %{
 7964     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7965   %}
 7966   ins_pipe(pipe_slow);
 7967 %}
 7968 
 7969 instruct loadSSI(rRegI dst, stackSlotI src)
 7970 %{
 7971   match(Set dst src);
 7972 
 7973   ins_cost(125);
 7974   format %{ "movl    $dst, $src\t# int stk" %}
 7975   ins_encode %{
 7976     __ movl($dst$$Register, $src$$Address);
 7977   %}
 7978   ins_pipe(ialu_reg_mem);
 7979 %}
 7980 
 7981 instruct loadSSL(rRegL dst, stackSlotL src)
 7982 %{
 7983   match(Set dst src);
 7984 
 7985   ins_cost(125);
 7986   format %{ "movq    $dst, $src\t# long stk" %}
 7987   ins_encode %{
 7988     __ movq($dst$$Register, $src$$Address);
 7989   %}
 7990   ins_pipe(ialu_reg_mem);
 7991 %}
 7992 
 7993 instruct loadSSP(rRegP dst, stackSlotP src)
 7994 %{
 7995   match(Set dst src);
 7996 
 7997   ins_cost(125);
 7998   format %{ "movq    $dst, $src\t# ptr stk" %}
 7999   ins_encode %{
 8000     __ movq($dst$$Register, $src$$Address);
 8001   %}
 8002   ins_pipe(ialu_reg_mem);
 8003 %}
 8004 
 8005 instruct loadSSF(regF dst, stackSlotF src)
 8006 %{
 8007   match(Set dst src);
 8008 
 8009   ins_cost(125);
 8010   format %{ "movss   $dst, $src\t# float stk" %}
 8011   ins_encode %{
 8012     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 8013   %}
 8014   ins_pipe(pipe_slow); // XXX
 8015 %}
 8016 
 8017 // Use the same format since predicate() can not be used here.
 8018 instruct loadSSD(regD dst, stackSlotD src)
 8019 %{
 8020   match(Set dst src);
 8021 
 8022   ins_cost(125);
 8023   format %{ "movsd   $dst, $src\t# double stk" %}
 8024   ins_encode  %{
 8025     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 8026   %}
 8027   ins_pipe(pipe_slow); // XXX
 8028 %}
 8029 
 8030 // Prefetch instructions for allocation.
 8031 // Must be safe to execute with invalid address (cannot fault).
 8032 
 8033 instruct prefetchAlloc( memory mem ) %{
 8034   predicate(AllocatePrefetchInstr==3);
 8035   match(PrefetchAllocation mem);
 8036   ins_cost(125);
 8037 
 8038   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 8039   ins_encode %{
 8040     __ prefetchw($mem$$Address);
 8041   %}
 8042   ins_pipe(ialu_mem);
 8043 %}
 8044 
 8045 instruct prefetchAllocNTA( memory mem ) %{
 8046   predicate(AllocatePrefetchInstr==0);
 8047   match(PrefetchAllocation mem);
 8048   ins_cost(125);
 8049 
 8050   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 8051   ins_encode %{
 8052     __ prefetchnta($mem$$Address);
 8053   %}
 8054   ins_pipe(ialu_mem);
 8055 %}
 8056 
 8057 instruct prefetchAllocT0( memory mem ) %{
 8058   predicate(AllocatePrefetchInstr==1);
 8059   match(PrefetchAllocation mem);
 8060   ins_cost(125);
 8061 
 8062   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 8063   ins_encode %{
 8064     __ prefetcht0($mem$$Address);
 8065   %}
 8066   ins_pipe(ialu_mem);
 8067 %}
 8068 
 8069 instruct prefetchAllocT2( memory mem ) %{
 8070   predicate(AllocatePrefetchInstr==2);
 8071   match(PrefetchAllocation mem);
 8072   ins_cost(125);
 8073 
 8074   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 8075   ins_encode %{
 8076     __ prefetcht2($mem$$Address);
 8077   %}
 8078   ins_pipe(ialu_mem);
 8079 %}
 8080 
 8081 //----------Store Instructions-------------------------------------------------
 8082 
 8083 // Store Byte
 8084 instruct storeB(memory mem, rRegI src)
 8085 %{
 8086   match(Set mem (StoreB mem src));
 8087 
 8088   ins_cost(125); // XXX
 8089   format %{ "movb    $mem, $src\t# byte" %}
 8090   ins_encode %{
 8091     __ movb($mem$$Address, $src$$Register);
 8092   %}
 8093   ins_pipe(ialu_mem_reg);
 8094 %}
 8095 
 8096 // Store Char/Short
 8097 instruct storeC(memory mem, rRegI src)
 8098 %{
 8099   match(Set mem (StoreC mem src));
 8100 
 8101   ins_cost(125); // XXX
 8102   format %{ "movw    $mem, $src\t# char/short" %}
 8103   ins_encode %{
 8104     __ movw($mem$$Address, $src$$Register);
 8105   %}
 8106   ins_pipe(ialu_mem_reg);
 8107 %}
 8108 
 8109 // Store Integer
 8110 instruct storeI(memory mem, rRegI src)
 8111 %{
 8112   match(Set mem (StoreI mem src));
 8113 
 8114   ins_cost(125); // XXX
 8115   format %{ "movl    $mem, $src\t# int" %}
 8116   ins_encode %{
 8117     __ movl($mem$$Address, $src$$Register);
 8118   %}
 8119   ins_pipe(ialu_mem_reg);
 8120 %}
 8121 
 8122 // Store Long
 8123 instruct storeL(memory mem, rRegL src)
 8124 %{
 8125   match(Set mem (StoreL mem src));
 8126 
 8127   ins_cost(125); // XXX
 8128   format %{ "movq    $mem, $src\t# long" %}
 8129   ins_encode %{
 8130     __ movq($mem$$Address, $src$$Register);
 8131   %}
 8132   ins_pipe(ialu_mem_reg); // XXX
 8133 %}
 8134 
 8135 // Store Pointer
 8136 instruct storeP(memory mem, any_RegP src)
 8137 %{
 8138   predicate(n->as_Store()->barrier_data() == 0);
 8139   match(Set mem (StoreP mem src));
 8140 
 8141   ins_cost(125); // XXX
 8142   format %{ "movq    $mem, $src\t# ptr" %}
 8143   ins_encode %{
 8144     __ movq($mem$$Address, $src$$Register);
 8145   %}
 8146   ins_pipe(ialu_mem_reg);
 8147 %}
 8148 
 8149 instruct storeImmP0(memory mem, immP0 zero)
 8150 %{
 8151   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8152   match(Set mem (StoreP mem zero));
 8153 
 8154   ins_cost(125); // XXX
 8155   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8156   ins_encode %{
 8157     __ movq($mem$$Address, r12);
 8158   %}
 8159   ins_pipe(ialu_mem_reg);
 8160 %}
 8161 
 8162 // Store Null Pointer, mark word, or other simple pointer constant.
 8163 instruct storeImmP(memory mem, immP31 src)
 8164 %{
 8165   predicate(n->as_Store()->barrier_data() == 0);
 8166   match(Set mem (StoreP mem src));
 8167 
 8168   ins_cost(150); // XXX
 8169   format %{ "movq    $mem, $src\t# ptr" %}
 8170   ins_encode %{
 8171     __ movq($mem$$Address, $src$$constant);
 8172   %}
 8173   ins_pipe(ialu_mem_imm);
 8174 %}
 8175 
 8176 // Store Compressed Pointer
 8177 instruct storeN(memory mem, rRegN src)
 8178 %{
 8179   predicate(n->as_Store()->barrier_data() == 0);
 8180   match(Set mem (StoreN mem src));
 8181 
 8182   ins_cost(125); // XXX
 8183   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8184   ins_encode %{
 8185     __ movl($mem$$Address, $src$$Register);
 8186   %}
 8187   ins_pipe(ialu_mem_reg);
 8188 %}
 8189 
 8190 instruct storeNKlass(memory mem, rRegN src)
 8191 %{
 8192   match(Set mem (StoreNKlass mem src));
 8193 
 8194   ins_cost(125); // XXX
 8195   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8196   ins_encode %{
 8197     __ movl($mem$$Address, $src$$Register);
 8198   %}
 8199   ins_pipe(ialu_mem_reg);
 8200 %}
 8201 
 8202 instruct storeImmN0(memory mem, immN0 zero)
 8203 %{
 8204   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8205   match(Set mem (StoreN mem zero));
 8206 
 8207   ins_cost(125); // XXX
 8208   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8209   ins_encode %{
 8210     __ movl($mem$$Address, r12);
 8211   %}
 8212   ins_pipe(ialu_mem_reg);
 8213 %}
 8214 
 8215 instruct storeImmN(memory mem, immN src)
 8216 %{
 8217   predicate(n->as_Store()->barrier_data() == 0);
 8218   match(Set mem (StoreN mem src));
 8219 
 8220   ins_cost(150); // XXX
 8221   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8222   ins_encode %{
 8223     address con = (address)$src$$constant;
 8224     if (con == nullptr) {
 8225       __ movl($mem$$Address, 0);
 8226     } else {
 8227       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8228     }
 8229   %}
 8230   ins_pipe(ialu_mem_imm);
 8231 %}
 8232 
 8233 instruct storeImmNKlass(memory mem, immNKlass src)
 8234 %{
 8235   match(Set mem (StoreNKlass mem src));
 8236 
 8237   ins_cost(150); // XXX
 8238   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8239   ins_encode %{
 8240     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8241   %}
 8242   ins_pipe(ialu_mem_imm);
 8243 %}
 8244 
 8245 // Store Integer Immediate
 8246 instruct storeImmI0(memory mem, immI_0 zero)
 8247 %{
 8248   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8249   match(Set mem (StoreI mem zero));
 8250 
 8251   ins_cost(125); // XXX
 8252   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8253   ins_encode %{
 8254     __ movl($mem$$Address, r12);
 8255   %}
 8256   ins_pipe(ialu_mem_reg);
 8257 %}
 8258 
 8259 instruct storeImmI(memory mem, immI src)
 8260 %{
 8261   match(Set mem (StoreI mem src));
 8262 
 8263   ins_cost(150);
 8264   format %{ "movl    $mem, $src\t# int" %}
 8265   ins_encode %{
 8266     __ movl($mem$$Address, $src$$constant);
 8267   %}
 8268   ins_pipe(ialu_mem_imm);
 8269 %}
 8270 
 8271 // Store Long Immediate
 8272 instruct storeImmL0(memory mem, immL0 zero)
 8273 %{
 8274   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8275   match(Set mem (StoreL mem zero));
 8276 
 8277   ins_cost(125); // XXX
 8278   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8279   ins_encode %{
 8280     __ movq($mem$$Address, r12);
 8281   %}
 8282   ins_pipe(ialu_mem_reg);
 8283 %}
 8284 
 8285 instruct storeImmL(memory mem, immL32 src)
 8286 %{
 8287   match(Set mem (StoreL mem src));
 8288 
 8289   ins_cost(150);
 8290   format %{ "movq    $mem, $src\t# long" %}
 8291   ins_encode %{
 8292     __ movq($mem$$Address, $src$$constant);
 8293   %}
 8294   ins_pipe(ialu_mem_imm);
 8295 %}
 8296 
 8297 // Store Short/Char Immediate
 8298 instruct storeImmC0(memory mem, immI_0 zero)
 8299 %{
 8300   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8301   match(Set mem (StoreC mem zero));
 8302 
 8303   ins_cost(125); // XXX
 8304   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8305   ins_encode %{
 8306     __ movw($mem$$Address, r12);
 8307   %}
 8308   ins_pipe(ialu_mem_reg);
 8309 %}
 8310 
 8311 instruct storeImmI16(memory mem, immI16 src)
 8312 %{
 8313   predicate(UseStoreImmI16);
 8314   match(Set mem (StoreC mem src));
 8315 
 8316   ins_cost(150);
 8317   format %{ "movw    $mem, $src\t# short/char" %}
 8318   ins_encode %{
 8319     __ movw($mem$$Address, $src$$constant);
 8320   %}
 8321   ins_pipe(ialu_mem_imm);
 8322 %}
 8323 
 8324 // Store Byte Immediate
 8325 instruct storeImmB0(memory mem, immI_0 zero)
 8326 %{
 8327   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8328   match(Set mem (StoreB mem zero));
 8329 
 8330   ins_cost(125); // XXX
 8331   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8332   ins_encode %{
 8333     __ movb($mem$$Address, r12);
 8334   %}
 8335   ins_pipe(ialu_mem_reg);
 8336 %}
 8337 
 8338 instruct storeImmB(memory mem, immI8 src)
 8339 %{
 8340   match(Set mem (StoreB mem src));
 8341 
 8342   ins_cost(150); // XXX
 8343   format %{ "movb    $mem, $src\t# byte" %}
 8344   ins_encode %{
 8345     __ movb($mem$$Address, $src$$constant);
 8346   %}
 8347   ins_pipe(ialu_mem_imm);
 8348 %}
 8349 
 8350 // Store Float
 8351 instruct storeF(memory mem, regF src)
 8352 %{
 8353   match(Set mem (StoreF mem src));
 8354 
 8355   ins_cost(95); // XXX
 8356   format %{ "movss   $mem, $src\t# float" %}
 8357   ins_encode %{
 8358     __ movflt($mem$$Address, $src$$XMMRegister);
 8359   %}
 8360   ins_pipe(pipe_slow); // XXX
 8361 %}
 8362 
 8363 // Store immediate Float value (it is faster than store from XMM register)
 8364 instruct storeF0(memory mem, immF0 zero)
 8365 %{
 8366   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8367   match(Set mem (StoreF mem zero));
 8368 
 8369   ins_cost(25); // XXX
 8370   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8371   ins_encode %{
 8372     __ movl($mem$$Address, r12);
 8373   %}
 8374   ins_pipe(ialu_mem_reg);
 8375 %}
 8376 
 8377 instruct storeF_imm(memory mem, immF src)
 8378 %{
 8379   match(Set mem (StoreF mem src));
 8380 
 8381   ins_cost(50);
 8382   format %{ "movl    $mem, $src\t# float" %}
 8383   ins_encode %{
 8384     __ movl($mem$$Address, jint_cast($src$$constant));
 8385   %}
 8386   ins_pipe(ialu_mem_imm);
 8387 %}
 8388 
 8389 // Store Double
 8390 instruct storeD(memory mem, regD src)
 8391 %{
 8392   match(Set mem (StoreD mem src));
 8393 
 8394   ins_cost(95); // XXX
 8395   format %{ "movsd   $mem, $src\t# double" %}
 8396   ins_encode %{
 8397     __ movdbl($mem$$Address, $src$$XMMRegister);
 8398   %}
 8399   ins_pipe(pipe_slow); // XXX
 8400 %}
 8401 
 8402 // Store immediate double 0.0 (it is faster than store from XMM register)
 8403 instruct storeD0_imm(memory mem, immD0 src)
 8404 %{
 8405   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8406   match(Set mem (StoreD mem src));
 8407 
 8408   ins_cost(50);
 8409   format %{ "movq    $mem, $src\t# double 0." %}
 8410   ins_encode %{
 8411     __ movq($mem$$Address, $src$$constant);
 8412   %}
 8413   ins_pipe(ialu_mem_imm);
 8414 %}
 8415 
 8416 instruct storeD0(memory mem, immD0 zero)
 8417 %{
 8418   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8419   match(Set mem (StoreD mem zero));
 8420 
 8421   ins_cost(25); // XXX
 8422   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8423   ins_encode %{
 8424     __ movq($mem$$Address, r12);
 8425   %}
 8426   ins_pipe(ialu_mem_reg);
 8427 %}
 8428 
 8429 instruct storeSSI(stackSlotI dst, rRegI src)
 8430 %{
 8431   match(Set dst src);
 8432 
 8433   ins_cost(100);
 8434   format %{ "movl    $dst, $src\t# int stk" %}
 8435   ins_encode %{
 8436     __ movl($dst$$Address, $src$$Register);
 8437   %}
 8438   ins_pipe( ialu_mem_reg );
 8439 %}
 8440 
 8441 instruct storeSSL(stackSlotL dst, rRegL src)
 8442 %{
 8443   match(Set dst src);
 8444 
 8445   ins_cost(100);
 8446   format %{ "movq    $dst, $src\t# long stk" %}
 8447   ins_encode %{
 8448     __ movq($dst$$Address, $src$$Register);
 8449   %}
 8450   ins_pipe(ialu_mem_reg);
 8451 %}
 8452 
 8453 instruct storeSSP(stackSlotP dst, rRegP src)
 8454 %{
 8455   match(Set dst src);
 8456 
 8457   ins_cost(100);
 8458   format %{ "movq    $dst, $src\t# ptr stk" %}
 8459   ins_encode %{
 8460     __ movq($dst$$Address, $src$$Register);
 8461   %}
 8462   ins_pipe(ialu_mem_reg);
 8463 %}
 8464 
 8465 instruct storeSSF(stackSlotF dst, regF src)
 8466 %{
 8467   match(Set dst src);
 8468 
 8469   ins_cost(95); // XXX
 8470   format %{ "movss   $dst, $src\t# float stk" %}
 8471   ins_encode %{
 8472     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8473   %}
 8474   ins_pipe(pipe_slow); // XXX
 8475 %}
 8476 
 8477 instruct storeSSD(stackSlotD dst, regD src)
 8478 %{
 8479   match(Set dst src);
 8480 
 8481   ins_cost(95); // XXX
 8482   format %{ "movsd   $dst, $src\t# double stk" %}
 8483   ins_encode %{
 8484     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8485   %}
 8486   ins_pipe(pipe_slow); // XXX
 8487 %}
 8488 
 8489 instruct cacheWB(indirect addr)
 8490 %{
 8491   predicate(VM_Version::supports_data_cache_line_flush());
 8492   match(CacheWB addr);
 8493 
 8494   ins_cost(100);
 8495   format %{"cache wb $addr" %}
 8496   ins_encode %{
 8497     assert($addr->index_position() < 0, "should be");
 8498     assert($addr$$disp == 0, "should be");
 8499     __ cache_wb(Address($addr$$base$$Register, 0));
 8500   %}
 8501   ins_pipe(pipe_slow); // XXX
 8502 %}
 8503 
 8504 instruct cacheWBPreSync()
 8505 %{
 8506   predicate(VM_Version::supports_data_cache_line_flush());
 8507   match(CacheWBPreSync);
 8508 
 8509   ins_cost(100);
 8510   format %{"cache wb presync" %}
 8511   ins_encode %{
 8512     __ cache_wbsync(true);
 8513   %}
 8514   ins_pipe(pipe_slow); // XXX
 8515 %}
 8516 
 8517 instruct cacheWBPostSync()
 8518 %{
 8519   predicate(VM_Version::supports_data_cache_line_flush());
 8520   match(CacheWBPostSync);
 8521 
 8522   ins_cost(100);
 8523   format %{"cache wb postsync" %}
 8524   ins_encode %{
 8525     __ cache_wbsync(false);
 8526   %}
 8527   ins_pipe(pipe_slow); // XXX
 8528 %}
 8529 
 8530 //----------BSWAP Instructions-------------------------------------------------
 8531 instruct bytes_reverse_int(rRegI dst) %{
 8532   match(Set dst (ReverseBytesI dst));
 8533 
 8534   format %{ "bswapl  $dst" %}
 8535   ins_encode %{
 8536     __ bswapl($dst$$Register);
 8537   %}
 8538   ins_pipe( ialu_reg );
 8539 %}
 8540 
 8541 instruct bytes_reverse_long(rRegL dst) %{
 8542   match(Set dst (ReverseBytesL dst));
 8543 
 8544   format %{ "bswapq  $dst" %}
 8545   ins_encode %{
 8546     __ bswapq($dst$$Register);
 8547   %}
 8548   ins_pipe( ialu_reg);
 8549 %}
 8550 
 8551 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8552   match(Set dst (ReverseBytesUS dst));
 8553   effect(KILL cr);
 8554 
 8555   format %{ "bswapl  $dst\n\t"
 8556             "shrl    $dst,16\n\t" %}
 8557   ins_encode %{
 8558     __ bswapl($dst$$Register);
 8559     __ shrl($dst$$Register, 16);
 8560   %}
 8561   ins_pipe( ialu_reg );
 8562 %}
 8563 
 8564 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8565   match(Set dst (ReverseBytesS dst));
 8566   effect(KILL cr);
 8567 
 8568   format %{ "bswapl  $dst\n\t"
 8569             "sar     $dst,16\n\t" %}
 8570   ins_encode %{
 8571     __ bswapl($dst$$Register);
 8572     __ sarl($dst$$Register, 16);
 8573   %}
 8574   ins_pipe( ialu_reg );
 8575 %}
 8576 
 8577 //---------- Zeros Count Instructions ------------------------------------------
 8578 
 8579 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8580   predicate(UseCountLeadingZerosInstruction);
 8581   match(Set dst (CountLeadingZerosI src));
 8582   effect(KILL cr);
 8583 
 8584   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8585   ins_encode %{
 8586     __ lzcntl($dst$$Register, $src$$Register);
 8587   %}
 8588   ins_pipe(ialu_reg);
 8589 %}
 8590 
 8591 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8592   predicate(UseCountLeadingZerosInstruction);
 8593   match(Set dst (CountLeadingZerosI (LoadI src)));
 8594   effect(KILL cr);
 8595   ins_cost(175);
 8596   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8597   ins_encode %{
 8598     __ lzcntl($dst$$Register, $src$$Address);
 8599   %}
 8600   ins_pipe(ialu_reg_mem);
 8601 %}
 8602 
 8603 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8604   predicate(!UseCountLeadingZerosInstruction);
 8605   match(Set dst (CountLeadingZerosI src));
 8606   effect(KILL cr);
 8607 
 8608   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8609             "jnz     skip\n\t"
 8610             "movl    $dst, -1\n"
 8611       "skip:\n\t"
 8612             "negl    $dst\n\t"
 8613             "addl    $dst, 31" %}
 8614   ins_encode %{
 8615     Register Rdst = $dst$$Register;
 8616     Register Rsrc = $src$$Register;
 8617     Label skip;
 8618     __ bsrl(Rdst, Rsrc);
 8619     __ jccb(Assembler::notZero, skip);
 8620     __ movl(Rdst, -1);
 8621     __ bind(skip);
 8622     __ negl(Rdst);
 8623     __ addl(Rdst, BitsPerInt - 1);
 8624   %}
 8625   ins_pipe(ialu_reg);
 8626 %}
 8627 
 8628 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8629   predicate(UseCountLeadingZerosInstruction);
 8630   match(Set dst (CountLeadingZerosL src));
 8631   effect(KILL cr);
 8632 
 8633   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8634   ins_encode %{
 8635     __ lzcntq($dst$$Register, $src$$Register);
 8636   %}
 8637   ins_pipe(ialu_reg);
 8638 %}
 8639 
 8640 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8641   predicate(UseCountLeadingZerosInstruction);
 8642   match(Set dst (CountLeadingZerosL (LoadL src)));
 8643   effect(KILL cr);
 8644   ins_cost(175);
 8645   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8646   ins_encode %{
 8647     __ lzcntq($dst$$Register, $src$$Address);
 8648   %}
 8649   ins_pipe(ialu_reg_mem);
 8650 %}
 8651 
 8652 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8653   predicate(!UseCountLeadingZerosInstruction);
 8654   match(Set dst (CountLeadingZerosL src));
 8655   effect(KILL cr);
 8656 
 8657   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8658             "jnz     skip\n\t"
 8659             "movl    $dst, -1\n"
 8660       "skip:\n\t"
 8661             "negl    $dst\n\t"
 8662             "addl    $dst, 63" %}
 8663   ins_encode %{
 8664     Register Rdst = $dst$$Register;
 8665     Register Rsrc = $src$$Register;
 8666     Label skip;
 8667     __ bsrq(Rdst, Rsrc);
 8668     __ jccb(Assembler::notZero, skip);
 8669     __ movl(Rdst, -1);
 8670     __ bind(skip);
 8671     __ negl(Rdst);
 8672     __ addl(Rdst, BitsPerLong - 1);
 8673   %}
 8674   ins_pipe(ialu_reg);
 8675 %}
 8676 
 8677 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8678   predicate(UseCountTrailingZerosInstruction);
 8679   match(Set dst (CountTrailingZerosI src));
 8680   effect(KILL cr);
 8681 
 8682   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8683   ins_encode %{
 8684     __ tzcntl($dst$$Register, $src$$Register);
 8685   %}
 8686   ins_pipe(ialu_reg);
 8687 %}
 8688 
 8689 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8690   predicate(UseCountTrailingZerosInstruction);
 8691   match(Set dst (CountTrailingZerosI (LoadI src)));
 8692   effect(KILL cr);
 8693   ins_cost(175);
 8694   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8695   ins_encode %{
 8696     __ tzcntl($dst$$Register, $src$$Address);
 8697   %}
 8698   ins_pipe(ialu_reg_mem);
 8699 %}
 8700 
 8701 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8702   predicate(!UseCountTrailingZerosInstruction);
 8703   match(Set dst (CountTrailingZerosI src));
 8704   effect(KILL cr);
 8705 
 8706   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8707             "jnz     done\n\t"
 8708             "movl    $dst, 32\n"
 8709       "done:" %}
 8710   ins_encode %{
 8711     Register Rdst = $dst$$Register;
 8712     Label done;
 8713     __ bsfl(Rdst, $src$$Register);
 8714     __ jccb(Assembler::notZero, done);
 8715     __ movl(Rdst, BitsPerInt);
 8716     __ bind(done);
 8717   %}
 8718   ins_pipe(ialu_reg);
 8719 %}
 8720 
 8721 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8722   predicate(UseCountTrailingZerosInstruction);
 8723   match(Set dst (CountTrailingZerosL src));
 8724   effect(KILL cr);
 8725 
 8726   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8727   ins_encode %{
 8728     __ tzcntq($dst$$Register, $src$$Register);
 8729   %}
 8730   ins_pipe(ialu_reg);
 8731 %}
 8732 
 8733 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8734   predicate(UseCountTrailingZerosInstruction);
 8735   match(Set dst (CountTrailingZerosL (LoadL src)));
 8736   effect(KILL cr);
 8737   ins_cost(175);
 8738   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8739   ins_encode %{
 8740     __ tzcntq($dst$$Register, $src$$Address);
 8741   %}
 8742   ins_pipe(ialu_reg_mem);
 8743 %}
 8744 
 8745 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8746   predicate(!UseCountTrailingZerosInstruction);
 8747   match(Set dst (CountTrailingZerosL src));
 8748   effect(KILL cr);
 8749 
 8750   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8751             "jnz     done\n\t"
 8752             "movl    $dst, 64\n"
 8753       "done:" %}
 8754   ins_encode %{
 8755     Register Rdst = $dst$$Register;
 8756     Label done;
 8757     __ bsfq(Rdst, $src$$Register);
 8758     __ jccb(Assembler::notZero, done);
 8759     __ movl(Rdst, BitsPerLong);
 8760     __ bind(done);
 8761   %}
 8762   ins_pipe(ialu_reg);
 8763 %}
 8764 
 8765 //--------------- Reverse Operation Instructions ----------------
 8766 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8767   predicate(!VM_Version::supports_gfni());
 8768   match(Set dst (ReverseI src));
 8769   effect(TEMP dst, TEMP rtmp, KILL cr);
 8770   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8771   ins_encode %{
 8772     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8773   %}
 8774   ins_pipe( ialu_reg );
 8775 %}
 8776 
 8777 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8778   predicate(VM_Version::supports_gfni());
 8779   match(Set dst (ReverseI src));
 8780   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8781   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8782   ins_encode %{
 8783     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8784   %}
 8785   ins_pipe( ialu_reg );
 8786 %}
 8787 
 8788 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8789   predicate(!VM_Version::supports_gfni());
 8790   match(Set dst (ReverseL src));
 8791   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8792   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8793   ins_encode %{
 8794     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8795   %}
 8796   ins_pipe( ialu_reg );
 8797 %}
 8798 
 8799 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8800   predicate(VM_Version::supports_gfni());
 8801   match(Set dst (ReverseL src));
 8802   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8803   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8804   ins_encode %{
 8805     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8806   %}
 8807   ins_pipe( ialu_reg );
 8808 %}
 8809 
 8810 //---------- Population Count Instructions -------------------------------------
 8811 
 8812 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8813   predicate(UsePopCountInstruction);
 8814   match(Set dst (PopCountI src));
 8815   effect(KILL cr);
 8816 
 8817   format %{ "popcnt  $dst, $src" %}
 8818   ins_encode %{
 8819     __ popcntl($dst$$Register, $src$$Register);
 8820   %}
 8821   ins_pipe(ialu_reg);
 8822 %}
 8823 
 8824 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8825   predicate(UsePopCountInstruction);
 8826   match(Set dst (PopCountI (LoadI mem)));
 8827   effect(KILL cr);
 8828 
 8829   format %{ "popcnt  $dst, $mem" %}
 8830   ins_encode %{
 8831     __ popcntl($dst$$Register, $mem$$Address);
 8832   %}
 8833   ins_pipe(ialu_reg);
 8834 %}
 8835 
 8836 // Note: Long.bitCount(long) returns an int.
 8837 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8838   predicate(UsePopCountInstruction);
 8839   match(Set dst (PopCountL src));
 8840   effect(KILL cr);
 8841 
 8842   format %{ "popcnt  $dst, $src" %}
 8843   ins_encode %{
 8844     __ popcntq($dst$$Register, $src$$Register);
 8845   %}
 8846   ins_pipe(ialu_reg);
 8847 %}
 8848 
 8849 // Note: Long.bitCount(long) returns an int.
 8850 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8851   predicate(UsePopCountInstruction);
 8852   match(Set dst (PopCountL (LoadL mem)));
 8853   effect(KILL cr);
 8854 
 8855   format %{ "popcnt  $dst, $mem" %}
 8856   ins_encode %{
 8857     __ popcntq($dst$$Register, $mem$$Address);
 8858   %}
 8859   ins_pipe(ialu_reg);
 8860 %}
 8861 
 8862 
 8863 //----------MemBar Instructions-----------------------------------------------
 8864 // Memory barrier flavors
 8865 
 8866 instruct membar_acquire()
 8867 %{
 8868   match(MemBarAcquire);
 8869   match(LoadFence);
 8870   ins_cost(0);
 8871 
 8872   size(0);
 8873   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8874   ins_encode();
 8875   ins_pipe(empty);
 8876 %}
 8877 
 8878 instruct membar_acquire_lock()
 8879 %{
 8880   match(MemBarAcquireLock);
 8881   ins_cost(0);
 8882 
 8883   size(0);
 8884   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8885   ins_encode();
 8886   ins_pipe(empty);
 8887 %}
 8888 
 8889 instruct membar_release()
 8890 %{
 8891   match(MemBarRelease);
 8892   match(StoreFence);
 8893   ins_cost(0);
 8894 
 8895   size(0);
 8896   format %{ "MEMBAR-release ! (empty encoding)" %}
 8897   ins_encode();
 8898   ins_pipe(empty);
 8899 %}
 8900 
 8901 instruct membar_release_lock()
 8902 %{
 8903   match(MemBarReleaseLock);
 8904   ins_cost(0);
 8905 
 8906   size(0);
 8907   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8908   ins_encode();
 8909   ins_pipe(empty);
 8910 %}
 8911 
 8912 instruct membar_storeload(rFlagsReg cr) %{
 8913   match(MemBarStoreLoad);
 8914   effect(KILL cr);
 8915   ins_cost(400);
 8916 
 8917   format %{
 8918     $$template
 8919     $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
 8920   %}
 8921   ins_encode %{
 8922     __ membar(Assembler::StoreLoad);
 8923   %}
 8924   ins_pipe(pipe_slow);
 8925 %}
 8926 
 8927 instruct membar_volatile(rFlagsReg cr) %{
 8928   match(MemBarVolatile);
 8929   effect(KILL cr);
 8930   ins_cost(400);
 8931 
 8932   format %{
 8933     $$template
 8934     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8935   %}
 8936   ins_encode %{
 8937     __ membar(Assembler::StoreLoad);
 8938   %}
 8939   ins_pipe(pipe_slow);
 8940 %}
 8941 
 8942 instruct unnecessary_membar_volatile()
 8943 %{
 8944   match(MemBarVolatile);
 8945   predicate(Matcher::post_store_load_barrier(n));
 8946   ins_cost(0);
 8947 
 8948   size(0);
 8949   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8950   ins_encode();
 8951   ins_pipe(empty);
 8952 %}
 8953 
 8954 instruct membar_full(rFlagsReg cr) %{
 8955   match(MemBarFull);
 8956   effect(KILL cr);
 8957   ins_cost(400);
 8958 
 8959   format %{
 8960     $$template
 8961     $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
 8962   %}
 8963   ins_encode %{
 8964     __ membar(Assembler::StoreLoad);
 8965   %}
 8966   ins_pipe(pipe_slow);
 8967 %}
 8968 
 8969 instruct membar_storestore() %{
 8970   match(MemBarStoreStore);
 8971   match(StoreStoreFence);
 8972   ins_cost(0);
 8973 
 8974   size(0);
 8975   format %{ "MEMBAR-storestore (empty encoding)" %}
 8976   ins_encode( );
 8977   ins_pipe(empty);
 8978 %}
 8979 
 8980 //----------Move Instructions--------------------------------------------------
 8981 
 8982 instruct castX2P(rRegP dst, rRegL src)
 8983 %{
 8984   match(Set dst (CastX2P src));
 8985 
 8986   format %{ "movq    $dst, $src\t# long->ptr" %}
 8987   ins_encode %{
 8988     if ($dst$$reg != $src$$reg) {
 8989       __ movptr($dst$$Register, $src$$Register);
 8990     }
 8991   %}
 8992   ins_pipe(ialu_reg_reg); // XXX
 8993 %}
 8994 
 8995 instruct castI2N(rRegN dst, rRegI src)
 8996 %{
 8997   match(Set dst (CastI2N src));
 8998 
 8999   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 9000   ins_encode %{
 9001     if ($dst$$reg != $src$$reg) {
 9002       __ movl($dst$$Register, $src$$Register);
 9003     }
 9004   %}
 9005   ins_pipe(ialu_reg_reg); // XXX
 9006 %}
 9007 
 9008 instruct castN2X(rRegL dst, rRegN src)
 9009 %{
 9010   match(Set dst (CastP2X src));
 9011 
 9012   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9013   ins_encode %{
 9014     if ($dst$$reg != $src$$reg) {
 9015       __ movptr($dst$$Register, $src$$Register);
 9016     }
 9017   %}
 9018   ins_pipe(ialu_reg_reg); // XXX
 9019 %}
 9020 
 9021 instruct castP2X(rRegL dst, rRegP src)
 9022 %{
 9023   match(Set dst (CastP2X src));
 9024 
 9025   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9026   ins_encode %{
 9027     if ($dst$$reg != $src$$reg) {
 9028       __ movptr($dst$$Register, $src$$Register);
 9029     }
 9030   %}
 9031   ins_pipe(ialu_reg_reg); // XXX
 9032 %}
 9033 
 9034 // Convert oop into int for vectors alignment masking
 9035 instruct convP2I(rRegI dst, rRegP src)
 9036 %{
 9037   match(Set dst (ConvL2I (CastP2X src)));
 9038 
 9039   format %{ "movl    $dst, $src\t# ptr -> int" %}
 9040   ins_encode %{
 9041     __ movl($dst$$Register, $src$$Register);
 9042   %}
 9043   ins_pipe(ialu_reg_reg); // XXX
 9044 %}
 9045 
 9046 // Convert compressed oop into int for vectors alignment masking
 9047 // in case of 32bit oops (heap < 4Gb).
 9048 instruct convN2I(rRegI dst, rRegN src)
 9049 %{
 9050   predicate(CompressedOops::shift() == 0);
 9051   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 9052 
 9053   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 9054   ins_encode %{
 9055     __ movl($dst$$Register, $src$$Register);
 9056   %}
 9057   ins_pipe(ialu_reg_reg); // XXX
 9058 %}
 9059 
 9060 // Convert oop pointer into compressed form
 9061 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 9062   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 9063   match(Set dst (EncodeP src));
 9064   effect(KILL cr);
 9065   format %{ "encode_heap_oop $dst,$src" %}
 9066   ins_encode %{
 9067     Register s = $src$$Register;
 9068     Register d = $dst$$Register;
 9069     if (s != d) {
 9070       __ movq(d, s);
 9071     }
 9072     __ encode_heap_oop(d);
 9073   %}
 9074   ins_pipe(ialu_reg_long);
 9075 %}
 9076 
 9077 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9078   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 9079   match(Set dst (EncodeP src));
 9080   effect(KILL cr);
 9081   format %{ "encode_heap_oop_not_null $dst,$src" %}
 9082   ins_encode %{
 9083     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 9084   %}
 9085   ins_pipe(ialu_reg_long);
 9086 %}
 9087 
 9088 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 9089   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 9090             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 9091   match(Set dst (DecodeN src));
 9092   effect(KILL cr);
 9093   format %{ "decode_heap_oop $dst,$src" %}
 9094   ins_encode %{
 9095     Register s = $src$$Register;
 9096     Register d = $dst$$Register;
 9097     if (s != d) {
 9098       __ movq(d, s);
 9099     }
 9100     __ decode_heap_oop(d);
 9101   %}
 9102   ins_pipe(ialu_reg_long);
 9103 %}
 9104 
 9105 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9106   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 9107             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 9108   match(Set dst (DecodeN src));
 9109   effect(KILL cr);
 9110   format %{ "decode_heap_oop_not_null $dst,$src" %}
 9111   ins_encode %{
 9112     Register s = $src$$Register;
 9113     Register d = $dst$$Register;
 9114     if (s != d) {
 9115       __ decode_heap_oop_not_null(d, s);
 9116     } else {
 9117       __ decode_heap_oop_not_null(d);
 9118     }
 9119   %}
 9120   ins_pipe(ialu_reg_long);
 9121 %}
 9122 
 9123 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9124   match(Set dst (EncodePKlass src));
 9125   effect(TEMP dst, KILL cr);
 9126   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 9127   ins_encode %{
 9128     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9129   %}
 9130   ins_pipe(ialu_reg_long);
 9131 %}
 9132 
 9133 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9134   match(Set dst (DecodeNKlass src));
 9135   effect(TEMP dst, KILL cr);
 9136   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 9137   ins_encode %{
 9138     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9139   %}
 9140   ins_pipe(ialu_reg_long);
 9141 %}
 9142 
 9143 //----------Conditional Move---------------------------------------------------
 9144 // Jump
 9145 // dummy instruction for generating temp registers
 9146 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 9147   match(Jump (LShiftL switch_val shift));
 9148   ins_cost(350);
 9149   predicate(false);
 9150   effect(TEMP dest);
 9151 
 9152   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9153             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 9154   ins_encode %{
 9155     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9156     // to do that and the compiler is using that register as one it can allocate.
 9157     // So we build it all by hand.
 9158     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9159     // ArrayAddress dispatch(table, index);
 9160     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9161     __ lea($dest$$Register, $constantaddress);
 9162     __ jmp(dispatch);
 9163   %}
 9164   ins_pipe(pipe_jmp);
 9165 %}
 9166 
 9167 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9168   match(Jump (AddL (LShiftL switch_val shift) offset));
 9169   ins_cost(350);
 9170   effect(TEMP dest);
 9171 
 9172   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9173             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9174   ins_encode %{
 9175     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9176     // to do that and the compiler is using that register as one it can allocate.
 9177     // So we build it all by hand.
 9178     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9179     // ArrayAddress dispatch(table, index);
 9180     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9181     __ lea($dest$$Register, $constantaddress);
 9182     __ jmp(dispatch);
 9183   %}
 9184   ins_pipe(pipe_jmp);
 9185 %}
 9186 
 9187 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9188   match(Jump switch_val);
 9189   ins_cost(350);
 9190   effect(TEMP dest);
 9191 
 9192   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9193             "jmp     [$dest + $switch_val]\n\t" %}
 9194   ins_encode %{
 9195     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9196     // to do that and the compiler is using that register as one it can allocate.
 9197     // So we build it all by hand.
 9198     // Address index(noreg, switch_reg, Address::times_1);
 9199     // ArrayAddress dispatch(table, index);
 9200     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9201     __ lea($dest$$Register, $constantaddress);
 9202     __ jmp(dispatch);
 9203   %}
 9204   ins_pipe(pipe_jmp);
 9205 %}
 9206 
 9207 // Conditional move
 9208 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9209 %{
 9210   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9211   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9212 
 9213   ins_cost(100); // XXX
 9214   format %{ "setbn$cop $dst\t# signed, int" %}
 9215   ins_encode %{
 9216     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9217     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9218   %}
 9219   ins_pipe(ialu_reg);
 9220 %}
 9221 
 9222 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9223 %{
 9224   predicate(!UseAPX);
 9225   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9226 
 9227   ins_cost(200); // XXX
 9228   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9229   ins_encode %{
 9230     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9231   %}
 9232   ins_pipe(pipe_cmov_reg);
 9233 %}
 9234 
 9235 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9236 %{
 9237   predicate(UseAPX);
 9238   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9239 
 9240   ins_cost(200);
 9241   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9242   ins_encode %{
 9243     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9244   %}
 9245   ins_pipe(pipe_cmov_reg);
 9246 %}
 9247 
 9248 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9249 %{
 9250   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9251   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9252 
 9253   ins_cost(100); // XXX
 9254   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9255   ins_encode %{
 9256     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9257     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9258   %}
 9259   ins_pipe(ialu_reg);
 9260 %}
 9261 
 9262 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9263   predicate(!UseAPX);
 9264   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9265 
 9266   ins_cost(200); // XXX
 9267   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9268   ins_encode %{
 9269     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9270   %}
 9271   ins_pipe(pipe_cmov_reg);
 9272 %}
 9273 
 9274 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9275   predicate(UseAPX);
 9276   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9277 
 9278   ins_cost(200);
 9279   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9280   ins_encode %{
 9281     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9282   %}
 9283   ins_pipe(pipe_cmov_reg);
 9284 %}
 9285 
 9286 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9287 %{
 9288   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9289   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9290 
 9291   ins_cost(100); // XXX
 9292   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9293   ins_encode %{
 9294     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9295     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9296   %}
 9297   ins_pipe(ialu_reg);
 9298 %}
 9299 
 9300 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9301 %{
 9302   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9303   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9304 
 9305   ins_cost(100); // XXX
 9306   format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
 9307   ins_encode %{
 9308     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9309     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9310   %}
 9311   ins_pipe(ialu_reg);
 9312 %}
 9313 
 9314 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9315   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9316 
 9317   ins_cost(200);
 9318   expand %{
 9319     cmovI_regU(cop, cr, dst, src);
 9320   %}
 9321 %}
 9322 
 9323 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
 9324   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9325 
 9326   ins_cost(200);
 9327   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9328   ins_encode %{
 9329     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9330   %}
 9331   ins_pipe(pipe_cmov_reg);
 9332 %}
 9333 
 9334 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9335   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9336   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9337 
 9338   ins_cost(200); // XXX
 9339   format %{ "cmovpl  $dst, $src\n\t"
 9340             "cmovnel $dst, $src" %}
 9341   ins_encode %{
 9342     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9343     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9344   %}
 9345   ins_pipe(pipe_cmov_reg);
 9346 %}
 9347 
 9348 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9349 // inputs of the CMove
 9350 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9351   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9352   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9353   effect(TEMP dst);
 9354 
 9355   ins_cost(200); // XXX
 9356   format %{ "cmovpl  $dst, $src\n\t"
 9357             "cmovnel $dst, $src" %}
 9358   ins_encode %{
 9359     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9360     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9361   %}
 9362   ins_pipe(pipe_cmov_reg);
 9363 %}
 9364 
 9365 // Conditional move
 9366 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9367   predicate(!UseAPX);
 9368   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9369 
 9370   ins_cost(250); // XXX
 9371   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9372   ins_encode %{
 9373     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9374   %}
 9375   ins_pipe(pipe_cmov_mem);
 9376 %}
 9377 
 9378 // Conditional move
 9379 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9380 %{
 9381   predicate(UseAPX);
 9382   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9383 
 9384   ins_cost(250);
 9385   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9386   ins_encode %{
 9387     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9388   %}
 9389   ins_pipe(pipe_cmov_mem);
 9390 %}
 9391 
 9392 // Conditional move
 9393 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9394 %{
 9395   predicate(!UseAPX);
 9396   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9397 
 9398   ins_cost(250); // XXX
 9399   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9400   ins_encode %{
 9401     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9402   %}
 9403   ins_pipe(pipe_cmov_mem);
 9404 %}
 9405 
 9406 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9407   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9408 
 9409   ins_cost(250);
 9410   expand %{
 9411     cmovI_memU(cop, cr, dst, src);
 9412   %}
 9413 %}
 9414 
 9415 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9416 %{
 9417   predicate(UseAPX);
 9418   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9419 
 9420   ins_cost(250);
 9421   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9422   ins_encode %{
 9423     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9424   %}
 9425   ins_pipe(pipe_cmov_mem);
 9426 %}
 9427 
 9428 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
 9429 %{
 9430   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9431 
 9432   ins_cost(250);
 9433   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9434   ins_encode %{
 9435     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9436   %}
 9437   ins_pipe(pipe_cmov_mem);
 9438 %}
 9439 
 9440 // Conditional move
 9441 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9442 %{
 9443   predicate(!UseAPX);
 9444   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9445 
 9446   ins_cost(200); // XXX
 9447   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9448   ins_encode %{
 9449     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9450   %}
 9451   ins_pipe(pipe_cmov_reg);
 9452 %}
 9453 
 9454 // Conditional move ndd
 9455 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9456 %{
 9457   predicate(UseAPX);
 9458   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9459 
 9460   ins_cost(200);
 9461   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9462   ins_encode %{
 9463     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9464   %}
 9465   ins_pipe(pipe_cmov_reg);
 9466 %}
 9467 
 9468 // Conditional move
 9469 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9470 %{
 9471   predicate(!UseAPX);
 9472   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9473 
 9474   ins_cost(200); // XXX
 9475   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9476   ins_encode %{
 9477     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9478   %}
 9479   ins_pipe(pipe_cmov_reg);
 9480 %}
 9481 
 9482 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9483   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9484 
 9485   ins_cost(200);
 9486   expand %{
 9487     cmovN_regU(cop, cr, dst, src);
 9488   %}
 9489 %}
 9490 
 9491 // Conditional move ndd
 9492 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9493 %{
 9494   predicate(UseAPX);
 9495   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9496 
 9497   ins_cost(200);
 9498   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9499   ins_encode %{
 9500     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9501   %}
 9502   ins_pipe(pipe_cmov_reg);
 9503 %}
 9504 
 9505 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
 9506   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9507 
 9508   ins_cost(200);
 9509   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
 9510   ins_encode %{
 9511     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9512   %}
 9513   ins_pipe(pipe_cmov_reg);
 9514 %}
 9515 
 9516 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9517   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9518   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9519 
 9520   ins_cost(200); // XXX
 9521   format %{ "cmovpl  $dst, $src\n\t"
 9522             "cmovnel $dst, $src" %}
 9523   ins_encode %{
 9524     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9525     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9526   %}
 9527   ins_pipe(pipe_cmov_reg);
 9528 %}
 9529 
 9530 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9531 // inputs of the CMove
 9532 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9533   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9534   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9535 
 9536   ins_cost(200); // XXX
 9537   format %{ "cmovpl  $dst, $src\n\t"
 9538             "cmovnel $dst, $src" %}
 9539   ins_encode %{
 9540     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9541     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9542   %}
 9543   ins_pipe(pipe_cmov_reg);
 9544 %}
 9545 
 9546 // Conditional move
 9547 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9548 %{
 9549   predicate(!UseAPX);
 9550   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9551 
 9552   ins_cost(200); // XXX
 9553   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9554   ins_encode %{
 9555     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9556   %}
 9557   ins_pipe(pipe_cmov_reg);  // XXX
 9558 %}
 9559 
 9560 // Conditional move ndd
 9561 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9562 %{
 9563   predicate(UseAPX);
 9564   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9565 
 9566   ins_cost(200);
 9567   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9568   ins_encode %{
 9569     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9570   %}
 9571   ins_pipe(pipe_cmov_reg);
 9572 %}
 9573 
 9574 // Conditional move
 9575 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9576 %{
 9577   predicate(!UseAPX);
 9578   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9579 
 9580   ins_cost(200); // XXX
 9581   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9582   ins_encode %{
 9583     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9584   %}
 9585   ins_pipe(pipe_cmov_reg); // XXX
 9586 %}
 9587 
 9588 // Conditional move ndd
 9589 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9590 %{
 9591   predicate(UseAPX);
 9592   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9593 
 9594   ins_cost(200);
 9595   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9596   ins_encode %{
 9597     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9598   %}
 9599   ins_pipe(pipe_cmov_reg);
 9600 %}
 9601 
 9602 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9603   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9604 
 9605   ins_cost(200);
 9606   expand %{
 9607     cmovP_regU(cop, cr, dst, src);
 9608   %}
 9609 %}
 9610 
 9611 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
 9612   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9613 
 9614   ins_cost(200);
 9615   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
 9616   ins_encode %{
 9617     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9618   %}
 9619   ins_pipe(pipe_cmov_reg);
 9620 %}
 9621 
 9622 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9623   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9624   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9625 
 9626   ins_cost(200); // XXX
 9627   format %{ "cmovpq  $dst, $src\n\t"
 9628             "cmovneq $dst, $src" %}
 9629   ins_encode %{
 9630     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9631     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9632   %}
 9633   ins_pipe(pipe_cmov_reg);
 9634 %}
 9635 
 9636 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9637 // inputs of the CMove
 9638 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9639   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9640   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9641 
 9642   ins_cost(200); // XXX
 9643   format %{ "cmovpq  $dst, $src\n\t"
 9644             "cmovneq $dst, $src" %}
 9645   ins_encode %{
 9646     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9647     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9648   %}
 9649   ins_pipe(pipe_cmov_reg);
 9650 %}
 9651 
 9652 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9653 %{
 9654   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9655   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9656 
 9657   ins_cost(100); // XXX
 9658   format %{ "setbn$cop $dst\t# signed, long" %}
 9659   ins_encode %{
 9660     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9661     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9662   %}
 9663   ins_pipe(ialu_reg);
 9664 %}
 9665 
 9666 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9667 %{
 9668   predicate(!UseAPX);
 9669   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9670 
 9671   ins_cost(200); // XXX
 9672   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9673   ins_encode %{
 9674     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9675   %}
 9676   ins_pipe(pipe_cmov_reg);  // XXX
 9677 %}
 9678 
 9679 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9680 %{
 9681   predicate(UseAPX);
 9682   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9683 
 9684   ins_cost(200);
 9685   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9686   ins_encode %{
 9687     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9688   %}
 9689   ins_pipe(pipe_cmov_reg);
 9690 %}
 9691 
 9692 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9693 %{
 9694   predicate(!UseAPX);
 9695   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9696 
 9697   ins_cost(200); // XXX
 9698   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9699   ins_encode %{
 9700     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9701   %}
 9702   ins_pipe(pipe_cmov_mem);  // XXX
 9703 %}
 9704 
 9705 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9706 %{
 9707   predicate(UseAPX);
 9708   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9709 
 9710   ins_cost(200);
 9711   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9712   ins_encode %{
 9713     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9714   %}
 9715   ins_pipe(pipe_cmov_mem);
 9716 %}
 9717 
 9718 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9719 %{
 9720   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9721   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9722 
 9723   ins_cost(100); // XXX
 9724   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9725   ins_encode %{
 9726     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9727     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9728   %}
 9729   ins_pipe(ialu_reg);
 9730 %}
 9731 
 9732 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9733 %{
 9734   predicate(!UseAPX);
 9735   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9736 
 9737   ins_cost(200); // XXX
 9738   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9739   ins_encode %{
 9740     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9741   %}
 9742   ins_pipe(pipe_cmov_reg); // XXX
 9743 %}
 9744 
 9745 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9746 %{
 9747   predicate(UseAPX);
 9748   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9749 
 9750   ins_cost(200);
 9751   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9752   ins_encode %{
 9753     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9754   %}
 9755   ins_pipe(pipe_cmov_reg);
 9756 %}
 9757 
 9758 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9759 %{
 9760   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9761   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9762 
 9763   ins_cost(100); // XXX
 9764   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9765   ins_encode %{
 9766     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9767     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9768   %}
 9769   ins_pipe(ialu_reg);
 9770 %}
 9771 
 9772 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9773 %{
 9774   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9775   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9776 
 9777   ins_cost(100); // XXX
 9778   format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
 9779   ins_encode %{
 9780     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9781     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9782   %}
 9783   ins_pipe(ialu_reg);
 9784 %}
 9785 
 9786 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9787   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9788 
 9789   ins_cost(200);
 9790   expand %{
 9791     cmovL_regU(cop, cr, dst, src);
 9792   %}
 9793 %}
 9794 
 9795 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
 9796 %{
 9797   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9798 
 9799   ins_cost(200);
 9800   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9801   ins_encode %{
 9802     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9803   %}
 9804   ins_pipe(pipe_cmov_reg);
 9805 %}
 9806 
 9807 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9808   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9809   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9810 
 9811   ins_cost(200); // XXX
 9812   format %{ "cmovpq  $dst, $src\n\t"
 9813             "cmovneq $dst, $src" %}
 9814   ins_encode %{
 9815     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9816     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9817   %}
 9818   ins_pipe(pipe_cmov_reg);
 9819 %}
 9820 
 9821 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9822 // inputs of the CMove
 9823 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9824   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9825   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9826 
 9827   ins_cost(200); // XXX
 9828   format %{ "cmovpq  $dst, $src\n\t"
 9829             "cmovneq $dst, $src" %}
 9830   ins_encode %{
 9831     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9832     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9833   %}
 9834   ins_pipe(pipe_cmov_reg);
 9835 %}
 9836 
 9837 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9838 %{
 9839   predicate(!UseAPX);
 9840   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9841 
 9842   ins_cost(200); // XXX
 9843   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9844   ins_encode %{
 9845     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9846   %}
 9847   ins_pipe(pipe_cmov_mem); // XXX
 9848 %}
 9849 
 9850 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9851   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9852 
 9853   ins_cost(200);
 9854   expand %{
 9855     cmovL_memU(cop, cr, dst, src);
 9856   %}
 9857 %}
 9858 
 9859 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9860 %{
 9861   predicate(UseAPX);
 9862   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9863 
 9864   ins_cost(200);
 9865   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9866   ins_encode %{
 9867     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9868   %}
 9869   ins_pipe(pipe_cmov_mem);
 9870 %}
 9871 
 9872 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
 9873 %{
 9874   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9875 
 9876   ins_cost(200);
 9877   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9878   ins_encode %{
 9879     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9880   %}
 9881   ins_pipe(pipe_cmov_mem);
 9882 %}
 9883 
 9884 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9885 %{
 9886   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9887 
 9888   ins_cost(200); // XXX
 9889   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9890             "movss     $dst, $src\n"
 9891     "skip:" %}
 9892   ins_encode %{
 9893     Label Lskip;
 9894     // Invert sense of branch from sense of CMOV
 9895     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9896     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9897     __ bind(Lskip);
 9898   %}
 9899   ins_pipe(pipe_slow);
 9900 %}
 9901 
 9902 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9903 %{
 9904   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9905 
 9906   ins_cost(200); // XXX
 9907   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9908             "movss     $dst, $src\n"
 9909     "skip:" %}
 9910   ins_encode %{
 9911     Label Lskip;
 9912     // Invert sense of branch from sense of CMOV
 9913     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9914     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9915     __ bind(Lskip);
 9916   %}
 9917   ins_pipe(pipe_slow);
 9918 %}
 9919 
 9920 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9921   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9922 
 9923   ins_cost(200);
 9924   expand %{
 9925     cmovF_regU(cop, cr, dst, src);
 9926   %}
 9927 %}
 9928 
 9929 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
 9930 %{
 9931   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9932 
 9933   ins_cost(200); // XXX
 9934   format %{ "jn$cop    skip\t# signed, unsigned cmove float\n\t"
 9935             "movss     $dst, $src\n"
 9936     "skip:" %}
 9937   ins_encode %{
 9938     Label Lskip;
 9939     // Invert sense of branch from sense of CMOV
 9940     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9941     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9942     __ bind(Lskip);
 9943   %}
 9944   ins_pipe(pipe_slow);
 9945 %}
 9946 
 9947 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9948 %{
 9949   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9950 
 9951   ins_cost(200); // XXX
 9952   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9953             "movsd     $dst, $src\n"
 9954     "skip:" %}
 9955   ins_encode %{
 9956     Label Lskip;
 9957     // Invert sense of branch from sense of CMOV
 9958     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9959     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9960     __ bind(Lskip);
 9961   %}
 9962   ins_pipe(pipe_slow);
 9963 %}
 9964 
 9965 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9966 %{
 9967   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9968 
 9969   ins_cost(200); // XXX
 9970   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9971             "movsd     $dst, $src\n"
 9972     "skip:" %}
 9973   ins_encode %{
 9974     Label Lskip;
 9975     // Invert sense of branch from sense of CMOV
 9976     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9977     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9978     __ bind(Lskip);
 9979   %}
 9980   ins_pipe(pipe_slow);
 9981 %}
 9982 
 9983 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9984   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9985 
 9986   ins_cost(200);
 9987   expand %{
 9988     cmovD_regU(cop, cr, dst, src);
 9989   %}
 9990 %}
 9991 
 9992 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
 9993 %{
 9994   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9995 
 9996   ins_cost(200); // XXX
 9997   format %{ "jn$cop    skip\t# signed, unsigned cmove double\n\t"
 9998             "movsd     $dst, $src\n"
 9999     "skip:" %}
10000   ins_encode %{
10001     Label Lskip;
10002     // Invert sense of branch from sense of CMOV
10003     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
10004     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
10005     __ bind(Lskip);
10006   %}
10007   ins_pipe(pipe_slow);
10008 %}
10009 
10010 //----------Arithmetic Instructions--------------------------------------------
10011 //----------Addition Instructions----------------------------------------------
10012 
10013 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10014 %{
10015   predicate(!UseAPX);
10016   match(Set dst (AddI dst src));
10017   effect(KILL cr);
10018   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10019   format %{ "addl    $dst, $src\t# int" %}
10020   ins_encode %{
10021     __ addl($dst$$Register, $src$$Register);
10022   %}
10023   ins_pipe(ialu_reg_reg);
10024 %}
10025 
10026 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
10027 %{
10028   predicate(UseAPX);
10029   match(Set dst (AddI src1 src2));
10030   effect(KILL cr);
10031   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10032 
10033   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10034   ins_encode %{
10035     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
10036   %}
10037   ins_pipe(ialu_reg_reg);
10038 %}
10039 
10040 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10041 %{
10042   predicate(!UseAPX);
10043   match(Set dst (AddI dst src));
10044   effect(KILL cr);
10045   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10046 
10047   format %{ "addl    $dst, $src\t# int" %}
10048   ins_encode %{
10049     __ addl($dst$$Register, $src$$constant);
10050   %}
10051   ins_pipe( ialu_reg );
10052 %}
10053 
10054 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
10055 %{
10056   predicate(UseAPX);
10057   match(Set dst (AddI src1 src2));
10058   effect(KILL cr);
10059   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10060 
10061   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10062   ins_encode %{
10063     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
10064   %}
10065   ins_pipe( ialu_reg );
10066 %}
10067 
10068 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
10069 %{
10070   predicate(UseAPX);
10071   match(Set dst (AddI (LoadI src1) src2));
10072   effect(KILL cr);
10073   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10074 
10075   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10076   ins_encode %{
10077     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
10078   %}
10079   ins_pipe( ialu_reg );
10080 %}
10081 
10082 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10083 %{
10084   predicate(!UseAPX);
10085   match(Set dst (AddI dst (LoadI src)));
10086   effect(KILL cr);
10087   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10088 
10089   ins_cost(150); // XXX
10090   format %{ "addl    $dst, $src\t# int" %}
10091   ins_encode %{
10092     __ addl($dst$$Register, $src$$Address);
10093   %}
10094   ins_pipe(ialu_reg_mem);
10095 %}
10096 
10097 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10098 %{
10099   predicate(UseAPX);
10100   match(Set dst (AddI src1 (LoadI src2)));
10101   effect(KILL cr);
10102   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10103 
10104   ins_cost(150);
10105   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10106   ins_encode %{
10107     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10108   %}
10109   ins_pipe(ialu_reg_mem);
10110 %}
10111 
10112 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10113 %{
10114   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10115   effect(KILL cr);
10116   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10117 
10118   ins_cost(150); // XXX
10119   format %{ "addl    $dst, $src\t# int" %}
10120   ins_encode %{
10121     __ addl($dst$$Address, $src$$Register);
10122   %}
10123   ins_pipe(ialu_mem_reg);
10124 %}
10125 
10126 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10127 %{
10128   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10129   effect(KILL cr);
10130   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10131 
10132 
10133   ins_cost(125); // XXX
10134   format %{ "addl    $dst, $src\t# int" %}
10135   ins_encode %{
10136     __ addl($dst$$Address, $src$$constant);
10137   %}
10138   ins_pipe(ialu_mem_imm);
10139 %}
10140 
10141 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10142 %{
10143   predicate(!UseAPX && UseIncDec);
10144   match(Set dst (AddI dst src));
10145   effect(KILL cr);
10146 
10147   format %{ "incl    $dst\t# int" %}
10148   ins_encode %{
10149     __ incrementl($dst$$Register);
10150   %}
10151   ins_pipe(ialu_reg);
10152 %}
10153 
10154 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10155 %{
10156   predicate(UseAPX && UseIncDec);
10157   match(Set dst (AddI src val));
10158   effect(KILL cr);
10159   flag(PD::Flag_ndd_demotable_opr1);
10160 
10161   format %{ "eincl    $dst, $src\t# int ndd" %}
10162   ins_encode %{
10163     __ eincl($dst$$Register, $src$$Register, false);
10164   %}
10165   ins_pipe(ialu_reg);
10166 %}
10167 
10168 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10169 %{
10170   predicate(UseAPX && UseIncDec);
10171   match(Set dst (AddI (LoadI src) val));
10172   effect(KILL cr);
10173 
10174   format %{ "eincl    $dst, $src\t# int ndd" %}
10175   ins_encode %{
10176     __ eincl($dst$$Register, $src$$Address, false);
10177   %}
10178   ins_pipe(ialu_reg);
10179 %}
10180 
10181 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10182 %{
10183   predicate(UseIncDec);
10184   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10185   effect(KILL cr);
10186 
10187   ins_cost(125); // XXX
10188   format %{ "incl    $dst\t# int" %}
10189   ins_encode %{
10190     __ incrementl($dst$$Address);
10191   %}
10192   ins_pipe(ialu_mem_imm);
10193 %}
10194 
10195 // XXX why does that use AddI
10196 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10197 %{
10198   predicate(!UseAPX && UseIncDec);
10199   match(Set dst (AddI dst src));
10200   effect(KILL cr);
10201 
10202   format %{ "decl    $dst\t# int" %}
10203   ins_encode %{
10204     __ decrementl($dst$$Register);
10205   %}
10206   ins_pipe(ialu_reg);
10207 %}
10208 
10209 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10210 %{
10211   predicate(UseAPX && UseIncDec);
10212   match(Set dst (AddI src val));
10213   effect(KILL cr);
10214   flag(PD::Flag_ndd_demotable_opr1);
10215 
10216   format %{ "edecl    $dst, $src\t# int ndd" %}
10217   ins_encode %{
10218     __ edecl($dst$$Register, $src$$Register, false);
10219   %}
10220   ins_pipe(ialu_reg);
10221 %}
10222 
10223 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10224 %{
10225   predicate(UseAPX && UseIncDec);
10226   match(Set dst (AddI (LoadI src) val));
10227   effect(KILL cr);
10228 
10229   format %{ "edecl    $dst, $src\t# int ndd" %}
10230   ins_encode %{
10231     __ edecl($dst$$Register, $src$$Address, false);
10232   %}
10233   ins_pipe(ialu_reg);
10234 %}
10235 
10236 // XXX why does that use AddI
10237 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10238 %{
10239   predicate(UseIncDec);
10240   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10241   effect(KILL cr);
10242 
10243   ins_cost(125); // XXX
10244   format %{ "decl    $dst\t# int" %}
10245   ins_encode %{
10246     __ decrementl($dst$$Address);
10247   %}
10248   ins_pipe(ialu_mem_imm);
10249 %}
10250 
10251 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10252 %{
10253   predicate(VM_Version::supports_fast_2op_lea());
10254   match(Set dst (AddI (LShiftI index scale) disp));
10255 
10256   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10257   ins_encode %{
10258     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10259     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10260   %}
10261   ins_pipe(ialu_reg_reg);
10262 %}
10263 
10264 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10265 %{
10266   predicate(VM_Version::supports_fast_3op_lea());
10267   match(Set dst (AddI (AddI base index) disp));
10268 
10269   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10270   ins_encode %{
10271     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10272   %}
10273   ins_pipe(ialu_reg_reg);
10274 %}
10275 
10276 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10277 %{
10278   predicate(VM_Version::supports_fast_2op_lea());
10279   match(Set dst (AddI base (LShiftI index scale)));
10280 
10281   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10282   ins_encode %{
10283     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10284     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10285   %}
10286   ins_pipe(ialu_reg_reg);
10287 %}
10288 
10289 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10290 %{
10291   predicate(VM_Version::supports_fast_3op_lea());
10292   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10293 
10294   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10295   ins_encode %{
10296     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10297     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10298   %}
10299   ins_pipe(ialu_reg_reg);
10300 %}
10301 
10302 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10303 %{
10304   predicate(!UseAPX);
10305   match(Set dst (AddL dst src));
10306   effect(KILL cr);
10307   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10308 
10309   format %{ "addq    $dst, $src\t# long" %}
10310   ins_encode %{
10311     __ addq($dst$$Register, $src$$Register);
10312   %}
10313   ins_pipe(ialu_reg_reg);
10314 %}
10315 
10316 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10317 %{
10318   predicate(UseAPX);
10319   match(Set dst (AddL src1 src2));
10320   effect(KILL cr);
10321   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10322 
10323   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10324   ins_encode %{
10325     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10326   %}
10327   ins_pipe(ialu_reg_reg);
10328 %}
10329 
10330 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10331 %{
10332   predicate(!UseAPX);
10333   match(Set dst (AddL dst src));
10334   effect(KILL cr);
10335   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10336 
10337   format %{ "addq    $dst, $src\t# long" %}
10338   ins_encode %{
10339     __ addq($dst$$Register, $src$$constant);
10340   %}
10341   ins_pipe( ialu_reg );
10342 %}
10343 
10344 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10345 %{
10346   predicate(UseAPX);
10347   match(Set dst (AddL src1 src2));
10348   effect(KILL cr);
10349   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10350 
10351   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10352   ins_encode %{
10353     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10354   %}
10355   ins_pipe( ialu_reg );
10356 %}
10357 
10358 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10359 %{
10360   predicate(UseAPX);
10361   match(Set dst (AddL (LoadL src1) src2));
10362   effect(KILL cr);
10363   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10364 
10365   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10366   ins_encode %{
10367     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10368   %}
10369   ins_pipe( ialu_reg );
10370 %}
10371 
10372 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10373 %{
10374   predicate(!UseAPX);
10375   match(Set dst (AddL dst (LoadL src)));
10376   effect(KILL cr);
10377   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10378 
10379   ins_cost(150); // XXX
10380   format %{ "addq    $dst, $src\t# long" %}
10381   ins_encode %{
10382     __ addq($dst$$Register, $src$$Address);
10383   %}
10384   ins_pipe(ialu_reg_mem);
10385 %}
10386 
10387 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10388 %{
10389   predicate(UseAPX);
10390   match(Set dst (AddL src1 (LoadL src2)));
10391   effect(KILL cr);
10392   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10393 
10394   ins_cost(150);
10395   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10396   ins_encode %{
10397     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10398   %}
10399   ins_pipe(ialu_reg_mem);
10400 %}
10401 
10402 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10403 %{
10404   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10405   effect(KILL cr);
10406   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10407 
10408   ins_cost(150); // XXX
10409   format %{ "addq    $dst, $src\t# long" %}
10410   ins_encode %{
10411     __ addq($dst$$Address, $src$$Register);
10412   %}
10413   ins_pipe(ialu_mem_reg);
10414 %}
10415 
10416 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10417 %{
10418   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10419   effect(KILL cr);
10420   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10421 
10422   ins_cost(125); // XXX
10423   format %{ "addq    $dst, $src\t# long" %}
10424   ins_encode %{
10425     __ addq($dst$$Address, $src$$constant);
10426   %}
10427   ins_pipe(ialu_mem_imm);
10428 %}
10429 
10430 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10431 %{
10432   predicate(!UseAPX && UseIncDec);
10433   match(Set dst (AddL dst src));
10434   effect(KILL cr);
10435 
10436   format %{ "incq    $dst\t# long" %}
10437   ins_encode %{
10438     __ incrementq($dst$$Register);
10439   %}
10440   ins_pipe(ialu_reg);
10441 %}
10442 
10443 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10444 %{
10445   predicate(UseAPX && UseIncDec);
10446   match(Set dst (AddL src val));
10447   effect(KILL cr);
10448   flag(PD::Flag_ndd_demotable_opr1);
10449 
10450   format %{ "eincq    $dst, $src\t# long ndd" %}
10451   ins_encode %{
10452     __ eincq($dst$$Register, $src$$Register, false);
10453   %}
10454   ins_pipe(ialu_reg);
10455 %}
10456 
10457 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10458 %{
10459   predicate(UseAPX && UseIncDec);
10460   match(Set dst (AddL (LoadL src) val));
10461   effect(KILL cr);
10462 
10463   format %{ "eincq    $dst, $src\t# long ndd" %}
10464   ins_encode %{
10465     __ eincq($dst$$Register, $src$$Address, false);
10466   %}
10467   ins_pipe(ialu_reg);
10468 %}
10469 
10470 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10471 %{
10472   predicate(UseIncDec);
10473   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10474   effect(KILL cr);
10475 
10476   ins_cost(125); // XXX
10477   format %{ "incq    $dst\t# long" %}
10478   ins_encode %{
10479     __ incrementq($dst$$Address);
10480   %}
10481   ins_pipe(ialu_mem_imm);
10482 %}
10483 
10484 // XXX why does that use AddL
10485 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10486 %{
10487   predicate(!UseAPX && UseIncDec);
10488   match(Set dst (AddL dst src));
10489   effect(KILL cr);
10490 
10491   format %{ "decq    $dst\t# long" %}
10492   ins_encode %{
10493     __ decrementq($dst$$Register);
10494   %}
10495   ins_pipe(ialu_reg);
10496 %}
10497 
10498 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10499 %{
10500   predicate(UseAPX && UseIncDec);
10501   match(Set dst (AddL src val));
10502   effect(KILL cr);
10503   flag(PD::Flag_ndd_demotable_opr1);
10504 
10505   format %{ "edecq    $dst, $src\t# long ndd" %}
10506   ins_encode %{
10507     __ edecq($dst$$Register, $src$$Register, false);
10508   %}
10509   ins_pipe(ialu_reg);
10510 %}
10511 
10512 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10513 %{
10514   predicate(UseAPX && UseIncDec);
10515   match(Set dst (AddL (LoadL src) val));
10516   effect(KILL cr);
10517 
10518   format %{ "edecq    $dst, $src\t# long ndd" %}
10519   ins_encode %{
10520     __ edecq($dst$$Register, $src$$Address, false);
10521   %}
10522   ins_pipe(ialu_reg);
10523 %}
10524 
10525 // XXX why does that use AddL
10526 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10527 %{
10528   predicate(UseIncDec);
10529   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10530   effect(KILL cr);
10531 
10532   ins_cost(125); // XXX
10533   format %{ "decq    $dst\t# long" %}
10534   ins_encode %{
10535     __ decrementq($dst$$Address);
10536   %}
10537   ins_pipe(ialu_mem_imm);
10538 %}
10539 
10540 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10541 %{
10542   predicate(VM_Version::supports_fast_2op_lea());
10543   match(Set dst (AddL (LShiftL index scale) disp));
10544 
10545   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10546   ins_encode %{
10547     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10548     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10549   %}
10550   ins_pipe(ialu_reg_reg);
10551 %}
10552 
10553 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10554 %{
10555   predicate(VM_Version::supports_fast_3op_lea());
10556   match(Set dst (AddL (AddL base index) disp));
10557 
10558   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10559   ins_encode %{
10560     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10561   %}
10562   ins_pipe(ialu_reg_reg);
10563 %}
10564 
10565 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10566 %{
10567   predicate(VM_Version::supports_fast_2op_lea());
10568   match(Set dst (AddL base (LShiftL index scale)));
10569 
10570   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10571   ins_encode %{
10572     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10573     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10574   %}
10575   ins_pipe(ialu_reg_reg);
10576 %}
10577 
10578 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10579 %{
10580   predicate(VM_Version::supports_fast_3op_lea());
10581   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10582 
10583   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10584   ins_encode %{
10585     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10586     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10587   %}
10588   ins_pipe(ialu_reg_reg);
10589 %}
10590 
10591 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10592 %{
10593   match(Set dst (AddP dst src));
10594   effect(KILL cr);
10595   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10596 
10597   format %{ "addq    $dst, $src\t# ptr" %}
10598   ins_encode %{
10599     __ addq($dst$$Register, $src$$Register);
10600   %}
10601   ins_pipe(ialu_reg_reg);
10602 %}
10603 
10604 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10605 %{
10606   match(Set dst (AddP dst src));
10607   effect(KILL cr);
10608   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10609 
10610   format %{ "addq    $dst, $src\t# ptr" %}
10611   ins_encode %{
10612     __ addq($dst$$Register, $src$$constant);
10613   %}
10614   ins_pipe( ialu_reg );
10615 %}
10616 
10617 // XXX addP mem ops ????
10618 
10619 instruct checkCastPP(rRegP dst)
10620 %{
10621   match(Set dst (CheckCastPP dst));
10622 
10623   size(0);
10624   format %{ "# checkcastPP of $dst" %}
10625   ins_encode(/* empty encoding */);
10626   ins_pipe(empty);
10627 %}
10628 
10629 instruct castPP(rRegP dst)
10630 %{
10631   match(Set dst (CastPP dst));
10632 
10633   size(0);
10634   format %{ "# castPP of $dst" %}
10635   ins_encode(/* empty encoding */);
10636   ins_pipe(empty);
10637 %}
10638 
10639 instruct castII(rRegI dst)
10640 %{
10641   predicate(VerifyConstraintCasts == 0);
10642   match(Set dst (CastII dst));
10643 
10644   size(0);
10645   format %{ "# castII of $dst" %}
10646   ins_encode(/* empty encoding */);
10647   ins_cost(0);
10648   ins_pipe(empty);
10649 %}
10650 
10651 instruct castII_checked(rRegI dst, rFlagsReg cr)
10652 %{
10653   predicate(VerifyConstraintCasts > 0);
10654   match(Set dst (CastII dst));
10655 
10656   effect(KILL cr);
10657   format %{ "# cast_checked_II $dst" %}
10658   ins_encode %{
10659     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10660   %}
10661   ins_pipe(pipe_slow);
10662 %}
10663 
10664 instruct castLL(rRegL dst)
10665 %{
10666   predicate(VerifyConstraintCasts == 0);
10667   match(Set dst (CastLL dst));
10668 
10669   size(0);
10670   format %{ "# castLL of $dst" %}
10671   ins_encode(/* empty encoding */);
10672   ins_cost(0);
10673   ins_pipe(empty);
10674 %}
10675 
10676 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10677 %{
10678   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10679   match(Set dst (CastLL dst));
10680 
10681   effect(KILL cr);
10682   format %{ "# cast_checked_LL $dst" %}
10683   ins_encode %{
10684     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10685   %}
10686   ins_pipe(pipe_slow);
10687 %}
10688 
10689 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10690 %{
10691   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10692   match(Set dst (CastLL dst));
10693 
10694   effect(KILL cr, TEMP tmp);
10695   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10696   ins_encode %{
10697     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10698   %}
10699   ins_pipe(pipe_slow);
10700 %}
10701 
10702 instruct castFF(regF dst)
10703 %{
10704   match(Set dst (CastFF dst));
10705 
10706   size(0);
10707   format %{ "# castFF of $dst" %}
10708   ins_encode(/* empty encoding */);
10709   ins_cost(0);
10710   ins_pipe(empty);
10711 %}
10712 
10713 instruct castHH(regF dst)
10714 %{
10715   match(Set dst (CastHH dst));
10716 
10717   size(0);
10718   format %{ "# castHH of $dst" %}
10719   ins_encode(/* empty encoding */);
10720   ins_cost(0);
10721   ins_pipe(empty);
10722 %}
10723 
10724 instruct castDD(regD dst)
10725 %{
10726   match(Set dst (CastDD dst));
10727 
10728   size(0);
10729   format %{ "# castDD of $dst" %}
10730   ins_encode(/* empty encoding */);
10731   ins_cost(0);
10732   ins_pipe(empty);
10733 %}
10734 
10735 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10736 instruct compareAndSwapP(rRegI res,
10737                          memory mem_ptr,
10738                          rax_RegP oldval, rRegP newval,
10739                          rFlagsReg cr)
10740 %{
10741   predicate(n->as_LoadStore()->barrier_data() == 0);
10742   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10743   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10744   effect(KILL cr, KILL oldval);
10745 
10746   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10747             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10748             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10749   ins_encode %{
10750     __ lock();
10751     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10752     __ setcc(Assembler::equal, $res$$Register);
10753   %}
10754   ins_pipe( pipe_cmpxchg );
10755 %}
10756 
10757 instruct compareAndSwapL(rRegI res,
10758                          memory mem_ptr,
10759                          rax_RegL oldval, rRegL newval,
10760                          rFlagsReg cr)
10761 %{
10762   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10763   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10764   effect(KILL cr, KILL oldval);
10765 
10766   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10767             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10768             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10769   ins_encode %{
10770     __ lock();
10771     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10772     __ setcc(Assembler::equal, $res$$Register);
10773   %}
10774   ins_pipe( pipe_cmpxchg );
10775 %}
10776 
10777 instruct compareAndSwapI(rRegI res,
10778                          memory mem_ptr,
10779                          rax_RegI oldval, rRegI newval,
10780                          rFlagsReg cr)
10781 %{
10782   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10783   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10784   effect(KILL cr, KILL oldval);
10785 
10786   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10787             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10788             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10789   ins_encode %{
10790     __ lock();
10791     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10792     __ setcc(Assembler::equal, $res$$Register);
10793   %}
10794   ins_pipe( pipe_cmpxchg );
10795 %}
10796 
10797 instruct compareAndSwapB(rRegI res,
10798                          memory mem_ptr,
10799                          rax_RegI oldval, rRegI newval,
10800                          rFlagsReg cr)
10801 %{
10802   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10803   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10804   effect(KILL cr, KILL oldval);
10805 
10806   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10807             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10808             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10809   ins_encode %{
10810     __ lock();
10811     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10812     __ setcc(Assembler::equal, $res$$Register);
10813   %}
10814   ins_pipe( pipe_cmpxchg );
10815 %}
10816 
10817 instruct compareAndSwapS(rRegI res,
10818                          memory mem_ptr,
10819                          rax_RegI oldval, rRegI newval,
10820                          rFlagsReg cr)
10821 %{
10822   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10823   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10824   effect(KILL cr, KILL oldval);
10825 
10826   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10827             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10828             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10829   ins_encode %{
10830     __ lock();
10831     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10832     __ setcc(Assembler::equal, $res$$Register);
10833   %}
10834   ins_pipe( pipe_cmpxchg );
10835 %}
10836 
10837 instruct compareAndSwapN(rRegI res,
10838                           memory mem_ptr,
10839                           rax_RegN oldval, rRegN newval,
10840                           rFlagsReg cr) %{
10841   predicate(n->as_LoadStore()->barrier_data() == 0);
10842   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10843   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10844   effect(KILL cr, KILL oldval);
10845 
10846   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10847             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10848             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10849   ins_encode %{
10850     __ lock();
10851     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10852     __ setcc(Assembler::equal, $res$$Register);
10853   %}
10854   ins_pipe( pipe_cmpxchg );
10855 %}
10856 
10857 instruct compareAndExchangeB(
10858                          memory mem_ptr,
10859                          rax_RegI oldval, rRegI newval,
10860                          rFlagsReg cr)
10861 %{
10862   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10863   effect(KILL cr);
10864 
10865   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10866             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10867   ins_encode %{
10868     __ lock();
10869     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10870   %}
10871   ins_pipe( pipe_cmpxchg );
10872 %}
10873 
10874 instruct compareAndExchangeS(
10875                          memory mem_ptr,
10876                          rax_RegI oldval, rRegI newval,
10877                          rFlagsReg cr)
10878 %{
10879   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10880   effect(KILL cr);
10881 
10882   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10883             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10884   ins_encode %{
10885     __ lock();
10886     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10887   %}
10888   ins_pipe( pipe_cmpxchg );
10889 %}
10890 
10891 instruct compareAndExchangeI(
10892                          memory mem_ptr,
10893                          rax_RegI oldval, rRegI newval,
10894                          rFlagsReg cr)
10895 %{
10896   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10897   effect(KILL cr);
10898 
10899   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10900             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10901   ins_encode %{
10902     __ lock();
10903     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10904   %}
10905   ins_pipe( pipe_cmpxchg );
10906 %}
10907 
10908 instruct compareAndExchangeL(
10909                          memory mem_ptr,
10910                          rax_RegL oldval, rRegL newval,
10911                          rFlagsReg cr)
10912 %{
10913   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10914   effect(KILL cr);
10915 
10916   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10917             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10918   ins_encode %{
10919     __ lock();
10920     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10921   %}
10922   ins_pipe( pipe_cmpxchg );
10923 %}
10924 
10925 instruct compareAndExchangeN(
10926                           memory mem_ptr,
10927                           rax_RegN oldval, rRegN newval,
10928                           rFlagsReg cr) %{
10929   predicate(n->as_LoadStore()->barrier_data() == 0);
10930   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10931   effect(KILL cr);
10932 
10933   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10934             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10935   ins_encode %{
10936     __ lock();
10937     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10938   %}
10939   ins_pipe( pipe_cmpxchg );
10940 %}
10941 
10942 instruct compareAndExchangeP(
10943                          memory mem_ptr,
10944                          rax_RegP oldval, rRegP newval,
10945                          rFlagsReg cr)
10946 %{
10947   predicate(n->as_LoadStore()->barrier_data() == 0);
10948   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10949   effect(KILL cr);
10950 
10951   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10952             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10953   ins_encode %{
10954     __ lock();
10955     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10956   %}
10957   ins_pipe( pipe_cmpxchg );
10958 %}
10959 
10960 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10961   predicate(n->as_LoadStore()->result_not_used());
10962   match(Set dummy (GetAndAddB mem add));
10963   effect(KILL cr);
10964   format %{ "addb_lock   $mem, $add" %}
10965   ins_encode %{
10966     __ lock();
10967     __ addb($mem$$Address, $add$$Register);
10968   %}
10969   ins_pipe(pipe_cmpxchg);
10970 %}
10971 
10972 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10973   predicate(n->as_LoadStore()->result_not_used());
10974   match(Set dummy (GetAndAddB mem add));
10975   effect(KILL cr);
10976   format %{ "addb_lock   $mem, $add" %}
10977   ins_encode %{
10978     __ lock();
10979     __ addb($mem$$Address, $add$$constant);
10980   %}
10981   ins_pipe(pipe_cmpxchg);
10982 %}
10983 
10984 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10985   predicate(!n->as_LoadStore()->result_not_used());
10986   match(Set newval (GetAndAddB mem newval));
10987   effect(KILL cr);
10988   format %{ "xaddb_lock  $mem, $newval" %}
10989   ins_encode %{
10990     __ lock();
10991     __ xaddb($mem$$Address, $newval$$Register);
10992   %}
10993   ins_pipe(pipe_cmpxchg);
10994 %}
10995 
10996 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10997   predicate(n->as_LoadStore()->result_not_used());
10998   match(Set dummy (GetAndAddS mem add));
10999   effect(KILL cr);
11000   format %{ "addw_lock   $mem, $add" %}
11001   ins_encode %{
11002     __ lock();
11003     __ addw($mem$$Address, $add$$Register);
11004   %}
11005   ins_pipe(pipe_cmpxchg);
11006 %}
11007 
11008 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
11009   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
11010   match(Set dummy (GetAndAddS mem add));
11011   effect(KILL cr);
11012   format %{ "addw_lock   $mem, $add" %}
11013   ins_encode %{
11014     __ lock();
11015     __ addw($mem$$Address, $add$$constant);
11016   %}
11017   ins_pipe(pipe_cmpxchg);
11018 %}
11019 
11020 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
11021   predicate(!n->as_LoadStore()->result_not_used());
11022   match(Set newval (GetAndAddS mem newval));
11023   effect(KILL cr);
11024   format %{ "xaddw_lock  $mem, $newval" %}
11025   ins_encode %{
11026     __ lock();
11027     __ xaddw($mem$$Address, $newval$$Register);
11028   %}
11029   ins_pipe(pipe_cmpxchg);
11030 %}
11031 
11032 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
11033   predicate(n->as_LoadStore()->result_not_used());
11034   match(Set dummy (GetAndAddI mem add));
11035   effect(KILL cr);
11036   format %{ "addl_lock   $mem, $add" %}
11037   ins_encode %{
11038     __ lock();
11039     __ addl($mem$$Address, $add$$Register);
11040   %}
11041   ins_pipe(pipe_cmpxchg);
11042 %}
11043 
11044 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
11045   predicate(n->as_LoadStore()->result_not_used());
11046   match(Set dummy (GetAndAddI mem add));
11047   effect(KILL cr);
11048   format %{ "addl_lock   $mem, $add" %}
11049   ins_encode %{
11050     __ lock();
11051     __ addl($mem$$Address, $add$$constant);
11052   %}
11053   ins_pipe(pipe_cmpxchg);
11054 %}
11055 
11056 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
11057   predicate(!n->as_LoadStore()->result_not_used());
11058   match(Set newval (GetAndAddI mem newval));
11059   effect(KILL cr);
11060   format %{ "xaddl_lock  $mem, $newval" %}
11061   ins_encode %{
11062     __ lock();
11063     __ xaddl($mem$$Address, $newval$$Register);
11064   %}
11065   ins_pipe(pipe_cmpxchg);
11066 %}
11067 
11068 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
11069   predicate(n->as_LoadStore()->result_not_used());
11070   match(Set dummy (GetAndAddL mem add));
11071   effect(KILL cr);
11072   format %{ "addq_lock   $mem, $add" %}
11073   ins_encode %{
11074     __ lock();
11075     __ addq($mem$$Address, $add$$Register);
11076   %}
11077   ins_pipe(pipe_cmpxchg);
11078 %}
11079 
11080 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
11081   predicate(n->as_LoadStore()->result_not_used());
11082   match(Set dummy (GetAndAddL mem add));
11083   effect(KILL cr);
11084   format %{ "addq_lock   $mem, $add" %}
11085   ins_encode %{
11086     __ lock();
11087     __ addq($mem$$Address, $add$$constant);
11088   %}
11089   ins_pipe(pipe_cmpxchg);
11090 %}
11091 
11092 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11093   predicate(!n->as_LoadStore()->result_not_used());
11094   match(Set newval (GetAndAddL mem newval));
11095   effect(KILL cr);
11096   format %{ "xaddq_lock  $mem, $newval" %}
11097   ins_encode %{
11098     __ lock();
11099     __ xaddq($mem$$Address, $newval$$Register);
11100   %}
11101   ins_pipe(pipe_cmpxchg);
11102 %}
11103 
11104 instruct xchgB( memory mem, rRegI newval) %{
11105   match(Set newval (GetAndSetB mem newval));
11106   format %{ "XCHGB  $newval,[$mem]" %}
11107   ins_encode %{
11108     __ xchgb($newval$$Register, $mem$$Address);
11109   %}
11110   ins_pipe( pipe_cmpxchg );
11111 %}
11112 
11113 instruct xchgS( memory mem, rRegI newval) %{
11114   match(Set newval (GetAndSetS mem newval));
11115   format %{ "XCHGW  $newval,[$mem]" %}
11116   ins_encode %{
11117     __ xchgw($newval$$Register, $mem$$Address);
11118   %}
11119   ins_pipe( pipe_cmpxchg );
11120 %}
11121 
11122 instruct xchgI( memory mem, rRegI newval) %{
11123   match(Set newval (GetAndSetI mem newval));
11124   format %{ "XCHGL  $newval,[$mem]" %}
11125   ins_encode %{
11126     __ xchgl($newval$$Register, $mem$$Address);
11127   %}
11128   ins_pipe( pipe_cmpxchg );
11129 %}
11130 
11131 instruct xchgL( memory mem, rRegL newval) %{
11132   match(Set newval (GetAndSetL mem newval));
11133   format %{ "XCHGL  $newval,[$mem]" %}
11134   ins_encode %{
11135     __ xchgq($newval$$Register, $mem$$Address);
11136   %}
11137   ins_pipe( pipe_cmpxchg );
11138 %}
11139 
11140 instruct xchgP( memory mem, rRegP newval) %{
11141   match(Set newval (GetAndSetP mem newval));
11142   predicate(n->as_LoadStore()->barrier_data() == 0);
11143   format %{ "XCHGQ  $newval,[$mem]" %}
11144   ins_encode %{
11145     __ xchgq($newval$$Register, $mem$$Address);
11146   %}
11147   ins_pipe( pipe_cmpxchg );
11148 %}
11149 
11150 instruct xchgN( memory mem, rRegN newval) %{
11151   predicate(n->as_LoadStore()->barrier_data() == 0);
11152   match(Set newval (GetAndSetN mem newval));
11153   format %{ "XCHGL  $newval,$mem]" %}
11154   ins_encode %{
11155     __ xchgl($newval$$Register, $mem$$Address);
11156   %}
11157   ins_pipe( pipe_cmpxchg );
11158 %}
11159 
11160 //----------Abs Instructions-------------------------------------------
11161 
11162 // Integer Absolute Instructions
11163 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11164 %{
11165   match(Set dst (AbsI src));
11166   effect(TEMP dst, KILL cr);
11167   format %{ "xorl    $dst, $dst\t# abs int\n\t"
11168             "subl    $dst, $src\n\t"
11169             "cmovll  $dst, $src" %}
11170   ins_encode %{
11171     __ xorl($dst$$Register, $dst$$Register);
11172     __ subl($dst$$Register, $src$$Register);
11173     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11174   %}
11175 
11176   ins_pipe(ialu_reg_reg);
11177 %}
11178 
11179 // Long Absolute Instructions
11180 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11181 %{
11182   match(Set dst (AbsL src));
11183   effect(TEMP dst, KILL cr);
11184   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11185             "subq    $dst, $src\n\t"
11186             "cmovlq  $dst, $src" %}
11187   ins_encode %{
11188     __ xorl($dst$$Register, $dst$$Register);
11189     __ subq($dst$$Register, $src$$Register);
11190     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11191   %}
11192 
11193   ins_pipe(ialu_reg_reg);
11194 %}
11195 
11196 //----------Subtraction Instructions-------------------------------------------
11197 
11198 // Integer Subtraction Instructions
11199 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11200 %{
11201   predicate(!UseAPX);
11202   match(Set dst (SubI dst src));
11203   effect(KILL cr);
11204   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11205 
11206   format %{ "subl    $dst, $src\t# int" %}
11207   ins_encode %{
11208     __ subl($dst$$Register, $src$$Register);
11209   %}
11210   ins_pipe(ialu_reg_reg);
11211 %}
11212 
11213 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11214 %{
11215   predicate(UseAPX);
11216   match(Set dst (SubI src1 src2));
11217   effect(KILL cr);
11218   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11219 
11220   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11221   ins_encode %{
11222     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11223   %}
11224   ins_pipe(ialu_reg_reg);
11225 %}
11226 
11227 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11228 %{
11229   predicate(UseAPX);
11230   match(Set dst (SubI src1 src2));
11231   effect(KILL cr);
11232   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11233 
11234   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11235   ins_encode %{
11236     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11237   %}
11238   ins_pipe(ialu_reg_reg);
11239 %}
11240 
11241 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11242 %{
11243   predicate(UseAPX);
11244   match(Set dst (SubI (LoadI src1) src2));
11245   effect(KILL cr);
11246   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11247 
11248   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11249   ins_encode %{
11250     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11251   %}
11252   ins_pipe(ialu_reg_reg);
11253 %}
11254 
11255 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11256 %{
11257   predicate(!UseAPX);
11258   match(Set dst (SubI dst (LoadI src)));
11259   effect(KILL cr);
11260   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11261 
11262   ins_cost(150);
11263   format %{ "subl    $dst, $src\t# int" %}
11264   ins_encode %{
11265     __ subl($dst$$Register, $src$$Address);
11266   %}
11267   ins_pipe(ialu_reg_mem);
11268 %}
11269 
11270 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11271 %{
11272   predicate(UseAPX);
11273   match(Set dst (SubI src1 (LoadI src2)));
11274   effect(KILL cr);
11275   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11276 
11277   ins_cost(150);
11278   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11279   ins_encode %{
11280     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11281   %}
11282   ins_pipe(ialu_reg_mem);
11283 %}
11284 
11285 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11286 %{
11287   predicate(UseAPX);
11288   match(Set dst (SubI (LoadI src1) src2));
11289   effect(KILL cr);
11290   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11291 
11292   ins_cost(150);
11293   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11294   ins_encode %{
11295     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11296   %}
11297   ins_pipe(ialu_reg_mem);
11298 %}
11299 
11300 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11301 %{
11302   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11303   effect(KILL cr);
11304   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11305 
11306   ins_cost(150);
11307   format %{ "subl    $dst, $src\t# int" %}
11308   ins_encode %{
11309     __ subl($dst$$Address, $src$$Register);
11310   %}
11311   ins_pipe(ialu_mem_reg);
11312 %}
11313 
11314 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11315 %{
11316   predicate(!UseAPX);
11317   match(Set dst (SubL dst src));
11318   effect(KILL cr);
11319   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11320 
11321   format %{ "subq    $dst, $src\t# long" %}
11322   ins_encode %{
11323     __ subq($dst$$Register, $src$$Register);
11324   %}
11325   ins_pipe(ialu_reg_reg);
11326 %}
11327 
11328 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11329 %{
11330   predicate(UseAPX);
11331   match(Set dst (SubL src1 src2));
11332   effect(KILL cr);
11333   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11334 
11335   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11336   ins_encode %{
11337     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11338   %}
11339   ins_pipe(ialu_reg_reg);
11340 %}
11341 
11342 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11343 %{
11344   predicate(UseAPX);
11345   match(Set dst (SubL src1 src2));
11346   effect(KILL cr);
11347   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11348 
11349   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11350   ins_encode %{
11351     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11352   %}
11353   ins_pipe(ialu_reg_reg);
11354 %}
11355 
11356 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11357 %{
11358   predicate(UseAPX);
11359   match(Set dst (SubL (LoadL src1) src2));
11360   effect(KILL cr);
11361   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11362 
11363   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11364   ins_encode %{
11365     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11366   %}
11367   ins_pipe(ialu_reg_reg);
11368 %}
11369 
11370 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11371 %{
11372   predicate(!UseAPX);
11373   match(Set dst (SubL dst (LoadL src)));
11374   effect(KILL cr);
11375   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11376 
11377   ins_cost(150);
11378   format %{ "subq    $dst, $src\t# long" %}
11379   ins_encode %{
11380     __ subq($dst$$Register, $src$$Address);
11381   %}
11382   ins_pipe(ialu_reg_mem);
11383 %}
11384 
11385 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11386 %{
11387   predicate(UseAPX);
11388   match(Set dst (SubL src1 (LoadL src2)));
11389   effect(KILL cr);
11390   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11391 
11392   ins_cost(150);
11393   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11394   ins_encode %{
11395     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11396   %}
11397   ins_pipe(ialu_reg_mem);
11398 %}
11399 
11400 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11401 %{
11402   predicate(UseAPX);
11403   match(Set dst (SubL (LoadL src1) src2));
11404   effect(KILL cr);
11405   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11406 
11407   ins_cost(150);
11408   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11409   ins_encode %{
11410     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11411   %}
11412   ins_pipe(ialu_reg_mem);
11413 %}
11414 
11415 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11416 %{
11417   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11418   effect(KILL cr);
11419   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11420 
11421   ins_cost(150);
11422   format %{ "subq    $dst, $src\t# long" %}
11423   ins_encode %{
11424     __ subq($dst$$Address, $src$$Register);
11425   %}
11426   ins_pipe(ialu_mem_reg);
11427 %}
11428 
11429 // Subtract from a pointer
11430 // XXX hmpf???
11431 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11432 %{
11433   match(Set dst (AddP dst (SubI zero src)));
11434   effect(KILL cr);
11435 
11436   format %{ "subq    $dst, $src\t# ptr - int" %}
11437   ins_encode %{
11438     __ subq($dst$$Register, $src$$Register);
11439   %}
11440   ins_pipe(ialu_reg_reg);
11441 %}
11442 
11443 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11444 %{
11445   predicate(!UseAPX);
11446   match(Set dst (SubI zero dst));
11447   effect(KILL cr);
11448   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11449 
11450   format %{ "negl    $dst\t# int" %}
11451   ins_encode %{
11452     __ negl($dst$$Register);
11453   %}
11454   ins_pipe(ialu_reg);
11455 %}
11456 
11457 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11458 %{
11459   predicate(UseAPX);
11460   match(Set dst (SubI zero src));
11461   effect(KILL cr);
11462   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11463 
11464   format %{ "enegl    $dst, $src\t# int ndd" %}
11465   ins_encode %{
11466     __ enegl($dst$$Register, $src$$Register, false);
11467   %}
11468   ins_pipe(ialu_reg);
11469 %}
11470 
11471 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11472 %{
11473   predicate(!UseAPX);
11474   match(Set dst (NegI dst));
11475   effect(KILL cr);
11476   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11477 
11478   format %{ "negl    $dst\t# int" %}
11479   ins_encode %{
11480     __ negl($dst$$Register);
11481   %}
11482   ins_pipe(ialu_reg);
11483 %}
11484 
11485 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11486 %{
11487   predicate(UseAPX);
11488   match(Set dst (NegI src));
11489   effect(KILL cr);
11490   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11491 
11492   format %{ "enegl    $dst, $src\t# int ndd" %}
11493   ins_encode %{
11494     __ enegl($dst$$Register, $src$$Register, false);
11495   %}
11496   ins_pipe(ialu_reg);
11497 %}
11498 
11499 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11500 %{
11501   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11502   effect(KILL cr);
11503   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11504 
11505   format %{ "negl    $dst\t# int" %}
11506   ins_encode %{
11507     __ negl($dst$$Address);
11508   %}
11509   ins_pipe(ialu_reg);
11510 %}
11511 
11512 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11513 %{
11514   predicate(!UseAPX);
11515   match(Set dst (SubL zero dst));
11516   effect(KILL cr);
11517   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11518 
11519   format %{ "negq    $dst\t# long" %}
11520   ins_encode %{
11521     __ negq($dst$$Register);
11522   %}
11523   ins_pipe(ialu_reg);
11524 %}
11525 
11526 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11527 %{
11528   predicate(UseAPX);
11529   match(Set dst (SubL zero src));
11530   effect(KILL cr);
11531   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11532 
11533   format %{ "enegq    $dst, $src\t# long ndd" %}
11534   ins_encode %{
11535     __ enegq($dst$$Register, $src$$Register, false);
11536   %}
11537   ins_pipe(ialu_reg);
11538 %}
11539 
11540 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11541 %{
11542   predicate(!UseAPX);
11543   match(Set dst (NegL dst));
11544   effect(KILL cr);
11545   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11546 
11547   format %{ "negq    $dst\t# int" %}
11548   ins_encode %{
11549     __ negq($dst$$Register);
11550   %}
11551   ins_pipe(ialu_reg);
11552 %}
11553 
11554 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11555 %{
11556   predicate(UseAPX);
11557   match(Set dst (NegL src));
11558   effect(KILL cr);
11559   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11560 
11561   format %{ "enegq    $dst, $src\t# long ndd" %}
11562   ins_encode %{
11563     __ enegq($dst$$Register, $src$$Register, false);
11564   %}
11565   ins_pipe(ialu_reg);
11566 %}
11567 
11568 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11569 %{
11570   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11571   effect(KILL cr);
11572   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11573 
11574   format %{ "negq    $dst\t# long" %}
11575   ins_encode %{
11576     __ negq($dst$$Address);
11577   %}
11578   ins_pipe(ialu_reg);
11579 %}
11580 
11581 //----------Multiplication/Division Instructions-------------------------------
11582 // Integer Multiplication Instructions
11583 // Multiply Register
11584 
11585 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11586 %{
11587   predicate(!UseAPX);
11588   match(Set dst (MulI dst src));
11589   effect(KILL cr);
11590 
11591   ins_cost(300);
11592   format %{ "imull   $dst, $src\t# int" %}
11593   ins_encode %{
11594     __ imull($dst$$Register, $src$$Register);
11595   %}
11596   ins_pipe(ialu_reg_reg_alu0);
11597 %}
11598 
11599 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11600 %{
11601   predicate(UseAPX);
11602   match(Set dst (MulI src1 src2));
11603   effect(KILL cr);
11604   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11605 
11606   ins_cost(300);
11607   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11608   ins_encode %{
11609     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11610   %}
11611   ins_pipe(ialu_reg_reg_alu0);
11612 %}
11613 
11614 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11615 %{
11616   match(Set dst (MulI src imm));
11617   effect(KILL cr);
11618 
11619   ins_cost(300);
11620   format %{ "imull   $dst, $src, $imm\t# int" %}
11621   ins_encode %{
11622     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11623   %}
11624   ins_pipe(ialu_reg_reg_alu0);
11625 %}
11626 
11627 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11628 %{
11629   predicate(!UseAPX);
11630   match(Set dst (MulI dst (LoadI src)));
11631   effect(KILL cr);
11632 
11633   ins_cost(350);
11634   format %{ "imull   $dst, $src\t# int" %}
11635   ins_encode %{
11636     __ imull($dst$$Register, $src$$Address);
11637   %}
11638   ins_pipe(ialu_reg_mem_alu0);
11639 %}
11640 
11641 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11642 %{
11643   predicate(UseAPX);
11644   match(Set dst (MulI src1 (LoadI src2)));
11645   effect(KILL cr);
11646   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11647 
11648   ins_cost(350);
11649   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11650   ins_encode %{
11651     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11652   %}
11653   ins_pipe(ialu_reg_mem_alu0);
11654 %}
11655 
11656 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11657 %{
11658   match(Set dst (MulI (LoadI src) imm));
11659   effect(KILL cr);
11660 
11661   ins_cost(300);
11662   format %{ "imull   $dst, $src, $imm\t# int" %}
11663   ins_encode %{
11664     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11665   %}
11666   ins_pipe(ialu_reg_mem_alu0);
11667 %}
11668 
11669 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11670 %{
11671   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11672   effect(KILL cr, KILL src2);
11673 
11674   expand %{ mulI_rReg(dst, src1, cr);
11675            mulI_rReg(src2, src3, cr);
11676            addI_rReg(dst, src2, cr); %}
11677 %}
11678 
11679 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11680 %{
11681   predicate(!UseAPX);
11682   match(Set dst (MulL dst src));
11683   effect(KILL cr);
11684 
11685   ins_cost(300);
11686   format %{ "imulq   $dst, $src\t# long" %}
11687   ins_encode %{
11688     __ imulq($dst$$Register, $src$$Register);
11689   %}
11690   ins_pipe(ialu_reg_reg_alu0);
11691 %}
11692 
11693 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11694 %{
11695   predicate(UseAPX);
11696   match(Set dst (MulL src1 src2));
11697   effect(KILL cr);
11698   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11699 
11700   ins_cost(300);
11701   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11702   ins_encode %{
11703     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11704   %}
11705   ins_pipe(ialu_reg_reg_alu0);
11706 %}
11707 
11708 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11709 %{
11710   match(Set dst (MulL src imm));
11711   effect(KILL cr);
11712 
11713   ins_cost(300);
11714   format %{ "imulq   $dst, $src, $imm\t# long" %}
11715   ins_encode %{
11716     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11717   %}
11718   ins_pipe(ialu_reg_reg_alu0);
11719 %}
11720 
11721 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11722 %{
11723   predicate(!UseAPX);
11724   match(Set dst (MulL dst (LoadL src)));
11725   effect(KILL cr);
11726 
11727   ins_cost(350);
11728   format %{ "imulq   $dst, $src\t# long" %}
11729   ins_encode %{
11730     __ imulq($dst$$Register, $src$$Address);
11731   %}
11732   ins_pipe(ialu_reg_mem_alu0);
11733 %}
11734 
11735 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11736 %{
11737   predicate(UseAPX);
11738   match(Set dst (MulL src1 (LoadL src2)));
11739   effect(KILL cr);
11740   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11741 
11742   ins_cost(350);
11743   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11744   ins_encode %{
11745     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11746   %}
11747   ins_pipe(ialu_reg_mem_alu0);
11748 %}
11749 
11750 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11751 %{
11752   match(Set dst (MulL (LoadL src) imm));
11753   effect(KILL cr);
11754 
11755   ins_cost(300);
11756   format %{ "imulq   $dst, $src, $imm\t# long" %}
11757   ins_encode %{
11758     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11759   %}
11760   ins_pipe(ialu_reg_mem_alu0);
11761 %}
11762 
11763 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11764 %{
11765   match(Set dst (MulHiL src rax));
11766   effect(USE_KILL rax, KILL cr);
11767 
11768   ins_cost(300);
11769   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11770   ins_encode %{
11771     __ imulq($src$$Register);
11772   %}
11773   ins_pipe(ialu_reg_reg_alu0);
11774 %}
11775 
11776 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11777 %{
11778   match(Set dst (UMulHiL src rax));
11779   effect(USE_KILL rax, KILL cr);
11780 
11781   ins_cost(300);
11782   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11783   ins_encode %{
11784     __ mulq($src$$Register);
11785   %}
11786   ins_pipe(ialu_reg_reg_alu0);
11787 %}
11788 
11789 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11790                    rFlagsReg cr)
11791 %{
11792   match(Set rax (DivI rax div));
11793   effect(KILL rdx, KILL cr);
11794 
11795   ins_cost(30*100+10*100); // XXX
11796   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11797             "jne,s   normal\n\t"
11798             "xorl    rdx, rdx\n\t"
11799             "cmpl    $div, -1\n\t"
11800             "je,s    done\n"
11801     "normal: cdql\n\t"
11802             "idivl   $div\n"
11803     "done:"        %}
11804   ins_encode(cdql_enc(div));
11805   ins_pipe(ialu_reg_reg_alu0);
11806 %}
11807 
11808 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11809                    rFlagsReg cr)
11810 %{
11811   match(Set rax (DivL rax div));
11812   effect(KILL rdx, KILL cr);
11813 
11814   ins_cost(30*100+10*100); // XXX
11815   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11816             "cmpq    rax, rdx\n\t"
11817             "jne,s   normal\n\t"
11818             "xorl    rdx, rdx\n\t"
11819             "cmpq    $div, -1\n\t"
11820             "je,s    done\n"
11821     "normal: cdqq\n\t"
11822             "idivq   $div\n"
11823     "done:"        %}
11824   ins_encode(cdqq_enc(div));
11825   ins_pipe(ialu_reg_reg_alu0);
11826 %}
11827 
11828 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11829 %{
11830   match(Set rax (UDivI rax div));
11831   effect(KILL rdx, KILL cr);
11832 
11833   ins_cost(300);
11834   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11835   ins_encode %{
11836     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11837   %}
11838   ins_pipe(ialu_reg_reg_alu0);
11839 %}
11840 
11841 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11842 %{
11843   match(Set rax (UDivL rax div));
11844   effect(KILL rdx, KILL cr);
11845 
11846   ins_cost(300);
11847   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11848   ins_encode %{
11849      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11850   %}
11851   ins_pipe(ialu_reg_reg_alu0);
11852 %}
11853 
11854 // Integer DIVMOD with Register, both quotient and mod results
11855 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11856                              rFlagsReg cr)
11857 %{
11858   match(DivModI rax div);
11859   effect(KILL cr);
11860 
11861   ins_cost(30*100+10*100); // XXX
11862   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11863             "jne,s   normal\n\t"
11864             "xorl    rdx, rdx\n\t"
11865             "cmpl    $div, -1\n\t"
11866             "je,s    done\n"
11867     "normal: cdql\n\t"
11868             "idivl   $div\n"
11869     "done:"        %}
11870   ins_encode(cdql_enc(div));
11871   ins_pipe(pipe_slow);
11872 %}
11873 
11874 // Long DIVMOD with Register, both quotient and mod results
11875 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11876                              rFlagsReg cr)
11877 %{
11878   match(DivModL rax div);
11879   effect(KILL cr);
11880 
11881   ins_cost(30*100+10*100); // XXX
11882   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11883             "cmpq    rax, rdx\n\t"
11884             "jne,s   normal\n\t"
11885             "xorl    rdx, rdx\n\t"
11886             "cmpq    $div, -1\n\t"
11887             "je,s    done\n"
11888     "normal: cdqq\n\t"
11889             "idivq   $div\n"
11890     "done:"        %}
11891   ins_encode(cdqq_enc(div));
11892   ins_pipe(pipe_slow);
11893 %}
11894 
11895 // Unsigned integer DIVMOD with Register, both quotient and mod results
11896 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11897                               no_rax_rdx_RegI div, rFlagsReg cr)
11898 %{
11899   match(UDivModI rax div);
11900   effect(TEMP tmp, KILL cr);
11901 
11902   ins_cost(300);
11903   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11904             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11905           %}
11906   ins_encode %{
11907     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11908   %}
11909   ins_pipe(pipe_slow);
11910 %}
11911 
11912 // Unsigned long DIVMOD with Register, both quotient and mod results
11913 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11914                               no_rax_rdx_RegL div, rFlagsReg cr)
11915 %{
11916   match(UDivModL rax div);
11917   effect(TEMP tmp, KILL cr);
11918 
11919   ins_cost(300);
11920   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11921             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11922           %}
11923   ins_encode %{
11924     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11925   %}
11926   ins_pipe(pipe_slow);
11927 %}
11928 
11929 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11930                    rFlagsReg cr)
11931 %{
11932   match(Set rdx (ModI rax div));
11933   effect(KILL rax, KILL cr);
11934 
11935   ins_cost(300); // XXX
11936   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11937             "jne,s   normal\n\t"
11938             "xorl    rdx, rdx\n\t"
11939             "cmpl    $div, -1\n\t"
11940             "je,s    done\n"
11941     "normal: cdql\n\t"
11942             "idivl   $div\n"
11943     "done:"        %}
11944   ins_encode(cdql_enc(div));
11945   ins_pipe(ialu_reg_reg_alu0);
11946 %}
11947 
11948 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11949                    rFlagsReg cr)
11950 %{
11951   match(Set rdx (ModL rax div));
11952   effect(KILL rax, KILL cr);
11953 
11954   ins_cost(300); // XXX
11955   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11956             "cmpq    rax, rdx\n\t"
11957             "jne,s   normal\n\t"
11958             "xorl    rdx, rdx\n\t"
11959             "cmpq    $div, -1\n\t"
11960             "je,s    done\n"
11961     "normal: cdqq\n\t"
11962             "idivq   $div\n"
11963     "done:"        %}
11964   ins_encode(cdqq_enc(div));
11965   ins_pipe(ialu_reg_reg_alu0);
11966 %}
11967 
11968 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11969 %{
11970   match(Set rdx (UModI rax div));
11971   effect(KILL rax, KILL cr);
11972 
11973   ins_cost(300);
11974   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11975   ins_encode %{
11976     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11977   %}
11978   ins_pipe(ialu_reg_reg_alu0);
11979 %}
11980 
11981 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11982 %{
11983   match(Set rdx (UModL rax div));
11984   effect(KILL rax, KILL cr);
11985 
11986   ins_cost(300);
11987   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11988   ins_encode %{
11989     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11990   %}
11991   ins_pipe(ialu_reg_reg_alu0);
11992 %}
11993 
11994 // Integer Shift Instructions
11995 // Shift Left by one, two, three
11996 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11997 %{
11998   predicate(!UseAPX);
11999   match(Set dst (LShiftI dst shift));
12000   effect(KILL cr);
12001 
12002   format %{ "sall    $dst, $shift" %}
12003   ins_encode %{
12004     __ sall($dst$$Register, $shift$$constant);
12005   %}
12006   ins_pipe(ialu_reg);
12007 %}
12008 
12009 // Shift Left by one, two, three
12010 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
12011 %{
12012   predicate(UseAPX);
12013   match(Set dst (LShiftI src shift));
12014   effect(KILL cr);
12015   flag(PD::Flag_ndd_demotable_opr1);
12016 
12017   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
12018   ins_encode %{
12019     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
12020   %}
12021   ins_pipe(ialu_reg);
12022 %}
12023 
12024 // Shift Left by 8-bit immediate
12025 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12026 %{
12027   predicate(!UseAPX);
12028   match(Set dst (LShiftI dst shift));
12029   effect(KILL cr);
12030 
12031   format %{ "sall    $dst, $shift" %}
12032   ins_encode %{
12033     __ sall($dst$$Register, $shift$$constant);
12034   %}
12035   ins_pipe(ialu_reg);
12036 %}
12037 
12038 // Shift Left by 8-bit immediate
12039 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12040 %{
12041   predicate(UseAPX);
12042   match(Set dst (LShiftI src shift));
12043   effect(KILL cr);
12044   flag(PD::Flag_ndd_demotable_opr1);
12045 
12046   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
12047   ins_encode %{
12048     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
12049   %}
12050   ins_pipe(ialu_reg);
12051 %}
12052 
12053 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12054 %{
12055   predicate(UseAPX);
12056   match(Set dst (LShiftI (LoadI src) shift));
12057   effect(KILL cr);
12058 
12059   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
12060   ins_encode %{
12061     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
12062   %}
12063   ins_pipe(ialu_reg);
12064 %}
12065 
12066 // Shift Left by 8-bit immediate
12067 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12068 %{
12069   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12070   effect(KILL cr);
12071 
12072   format %{ "sall    $dst, $shift" %}
12073   ins_encode %{
12074     __ sall($dst$$Address, $shift$$constant);
12075   %}
12076   ins_pipe(ialu_mem_imm);
12077 %}
12078 
12079 // Shift Left by variable
12080 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12081 %{
12082   predicate(!VM_Version::supports_bmi2());
12083   match(Set dst (LShiftI dst shift));
12084   effect(KILL cr);
12085 
12086   format %{ "sall    $dst, $shift" %}
12087   ins_encode %{
12088     __ sall($dst$$Register);
12089   %}
12090   ins_pipe(ialu_reg_reg);
12091 %}
12092 
12093 // Shift Left by variable
12094 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12095 %{
12096   predicate(!VM_Version::supports_bmi2());
12097   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12098   effect(KILL cr);
12099 
12100   format %{ "sall    $dst, $shift" %}
12101   ins_encode %{
12102     __ sall($dst$$Address);
12103   %}
12104   ins_pipe(ialu_mem_reg);
12105 %}
12106 
12107 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12108 %{
12109   predicate(VM_Version::supports_bmi2());
12110   match(Set dst (LShiftI src shift));
12111 
12112   format %{ "shlxl   $dst, $src, $shift" %}
12113   ins_encode %{
12114     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12115   %}
12116   ins_pipe(ialu_reg_reg);
12117 %}
12118 
12119 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12120 %{
12121   predicate(VM_Version::supports_bmi2());
12122   match(Set dst (LShiftI (LoadI src) shift));
12123   ins_cost(175);
12124   format %{ "shlxl   $dst, $src, $shift" %}
12125   ins_encode %{
12126     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12127   %}
12128   ins_pipe(ialu_reg_mem);
12129 %}
12130 
12131 // Arithmetic Shift Right by 8-bit immediate
12132 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12133 %{
12134   predicate(!UseAPX);
12135   match(Set dst (RShiftI dst shift));
12136   effect(KILL cr);
12137 
12138   format %{ "sarl    $dst, $shift" %}
12139   ins_encode %{
12140     __ sarl($dst$$Register, $shift$$constant);
12141   %}
12142   ins_pipe(ialu_mem_imm);
12143 %}
12144 
12145 // Arithmetic Shift Right by 8-bit immediate
12146 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12147 %{
12148   predicate(UseAPX);
12149   match(Set dst (RShiftI src shift));
12150   effect(KILL cr);
12151   flag(PD::Flag_ndd_demotable_opr1);
12152 
12153   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12154   ins_encode %{
12155     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12156   %}
12157   ins_pipe(ialu_mem_imm);
12158 %}
12159 
12160 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12161 %{
12162   predicate(UseAPX);
12163   match(Set dst (RShiftI (LoadI src) shift));
12164   effect(KILL cr);
12165 
12166   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12167   ins_encode %{
12168     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12169   %}
12170   ins_pipe(ialu_mem_imm);
12171 %}
12172 
12173 // Arithmetic Shift Right by 8-bit immediate
12174 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12175 %{
12176   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12177   effect(KILL cr);
12178 
12179   format %{ "sarl    $dst, $shift" %}
12180   ins_encode %{
12181     __ sarl($dst$$Address, $shift$$constant);
12182   %}
12183   ins_pipe(ialu_mem_imm);
12184 %}
12185 
12186 // Arithmetic Shift Right by variable
12187 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12188 %{
12189   predicate(!VM_Version::supports_bmi2());
12190   match(Set dst (RShiftI dst shift));
12191   effect(KILL cr);
12192 
12193   format %{ "sarl    $dst, $shift" %}
12194   ins_encode %{
12195     __ sarl($dst$$Register);
12196   %}
12197   ins_pipe(ialu_reg_reg);
12198 %}
12199 
12200 // Arithmetic Shift Right by variable
12201 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12202 %{
12203   predicate(!VM_Version::supports_bmi2());
12204   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12205   effect(KILL cr);
12206 
12207   format %{ "sarl    $dst, $shift" %}
12208   ins_encode %{
12209     __ sarl($dst$$Address);
12210   %}
12211   ins_pipe(ialu_mem_reg);
12212 %}
12213 
12214 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12215 %{
12216   predicate(VM_Version::supports_bmi2());
12217   match(Set dst (RShiftI src shift));
12218 
12219   format %{ "sarxl   $dst, $src, $shift" %}
12220   ins_encode %{
12221     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12222   %}
12223   ins_pipe(ialu_reg_reg);
12224 %}
12225 
12226 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12227 %{
12228   predicate(VM_Version::supports_bmi2());
12229   match(Set dst (RShiftI (LoadI src) shift));
12230   ins_cost(175);
12231   format %{ "sarxl   $dst, $src, $shift" %}
12232   ins_encode %{
12233     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12234   %}
12235   ins_pipe(ialu_reg_mem);
12236 %}
12237 
12238 // Logical Shift Right by 8-bit immediate
12239 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12240 %{
12241   predicate(!UseAPX);
12242   match(Set dst (URShiftI dst shift));
12243   effect(KILL cr);
12244 
12245   format %{ "shrl    $dst, $shift" %}
12246   ins_encode %{
12247     __ shrl($dst$$Register, $shift$$constant);
12248   %}
12249   ins_pipe(ialu_reg);
12250 %}
12251 
12252 // Logical Shift Right by 8-bit immediate
12253 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12254 %{
12255   predicate(UseAPX);
12256   match(Set dst (URShiftI src shift));
12257   effect(KILL cr);
12258   flag(PD::Flag_ndd_demotable_opr1);
12259 
12260   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12261   ins_encode %{
12262     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12263   %}
12264   ins_pipe(ialu_reg);
12265 %}
12266 
12267 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12268 %{
12269   predicate(UseAPX);
12270   match(Set dst (URShiftI (LoadI src) shift));
12271   effect(KILL cr);
12272 
12273   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12274   ins_encode %{
12275     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12276   %}
12277   ins_pipe(ialu_reg);
12278 %}
12279 
12280 // Logical Shift Right by 8-bit immediate
12281 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12282 %{
12283   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12284   effect(KILL cr);
12285 
12286   format %{ "shrl    $dst, $shift" %}
12287   ins_encode %{
12288     __ shrl($dst$$Address, $shift$$constant);
12289   %}
12290   ins_pipe(ialu_mem_imm);
12291 %}
12292 
12293 // Logical Shift Right by variable
12294 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12295 %{
12296   predicate(!VM_Version::supports_bmi2());
12297   match(Set dst (URShiftI dst shift));
12298   effect(KILL cr);
12299 
12300   format %{ "shrl    $dst, $shift" %}
12301   ins_encode %{
12302     __ shrl($dst$$Register);
12303   %}
12304   ins_pipe(ialu_reg_reg);
12305 %}
12306 
12307 // Logical Shift Right by variable
12308 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12309 %{
12310   predicate(!VM_Version::supports_bmi2());
12311   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12312   effect(KILL cr);
12313 
12314   format %{ "shrl    $dst, $shift" %}
12315   ins_encode %{
12316     __ shrl($dst$$Address);
12317   %}
12318   ins_pipe(ialu_mem_reg);
12319 %}
12320 
12321 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12322 %{
12323   predicate(VM_Version::supports_bmi2());
12324   match(Set dst (URShiftI src shift));
12325 
12326   format %{ "shrxl   $dst, $src, $shift" %}
12327   ins_encode %{
12328     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12329   %}
12330   ins_pipe(ialu_reg_reg);
12331 %}
12332 
12333 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12334 %{
12335   predicate(VM_Version::supports_bmi2());
12336   match(Set dst (URShiftI (LoadI src) shift));
12337   ins_cost(175);
12338   format %{ "shrxl   $dst, $src, $shift" %}
12339   ins_encode %{
12340     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12341   %}
12342   ins_pipe(ialu_reg_mem);
12343 %}
12344 
12345 // Long Shift Instructions
12346 // Shift Left by one, two, three
12347 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12348 %{
12349   predicate(!UseAPX);
12350   match(Set dst (LShiftL dst shift));
12351   effect(KILL cr);
12352 
12353   format %{ "salq    $dst, $shift" %}
12354   ins_encode %{
12355     __ salq($dst$$Register, $shift$$constant);
12356   %}
12357   ins_pipe(ialu_reg);
12358 %}
12359 
12360 // Shift Left by one, two, three
12361 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12362 %{
12363   predicate(UseAPX);
12364   match(Set dst (LShiftL src shift));
12365   effect(KILL cr);
12366   flag(PD::Flag_ndd_demotable_opr1);
12367 
12368   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12369   ins_encode %{
12370     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12371   %}
12372   ins_pipe(ialu_reg);
12373 %}
12374 
12375 // Shift Left by 8-bit immediate
12376 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12377 %{
12378   predicate(!UseAPX);
12379   match(Set dst (LShiftL dst shift));
12380   effect(KILL cr);
12381 
12382   format %{ "salq    $dst, $shift" %}
12383   ins_encode %{
12384     __ salq($dst$$Register, $shift$$constant);
12385   %}
12386   ins_pipe(ialu_reg);
12387 %}
12388 
12389 // Shift Left by 8-bit immediate
12390 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12391 %{
12392   predicate(UseAPX);
12393   match(Set dst (LShiftL src shift));
12394   effect(KILL cr);
12395   flag(PD::Flag_ndd_demotable_opr1);
12396 
12397   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12398   ins_encode %{
12399     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12400   %}
12401   ins_pipe(ialu_reg);
12402 %}
12403 
12404 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12405 %{
12406   predicate(UseAPX);
12407   match(Set dst (LShiftL (LoadL src) shift));
12408   effect(KILL cr);
12409 
12410   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12411   ins_encode %{
12412     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12413   %}
12414   ins_pipe(ialu_reg);
12415 %}
12416 
12417 // Shift Left by 8-bit immediate
12418 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12419 %{
12420   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12421   effect(KILL cr);
12422 
12423   format %{ "salq    $dst, $shift" %}
12424   ins_encode %{
12425     __ salq($dst$$Address, $shift$$constant);
12426   %}
12427   ins_pipe(ialu_mem_imm);
12428 %}
12429 
12430 // Shift Left by variable
12431 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12432 %{
12433   predicate(!VM_Version::supports_bmi2());
12434   match(Set dst (LShiftL dst shift));
12435   effect(KILL cr);
12436 
12437   format %{ "salq    $dst, $shift" %}
12438   ins_encode %{
12439     __ salq($dst$$Register);
12440   %}
12441   ins_pipe(ialu_reg_reg);
12442 %}
12443 
12444 // Shift Left by variable
12445 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12446 %{
12447   predicate(!VM_Version::supports_bmi2());
12448   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12449   effect(KILL cr);
12450 
12451   format %{ "salq    $dst, $shift" %}
12452   ins_encode %{
12453     __ salq($dst$$Address);
12454   %}
12455   ins_pipe(ialu_mem_reg);
12456 %}
12457 
12458 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12459 %{
12460   predicate(VM_Version::supports_bmi2());
12461   match(Set dst (LShiftL src shift));
12462 
12463   format %{ "shlxq   $dst, $src, $shift" %}
12464   ins_encode %{
12465     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12466   %}
12467   ins_pipe(ialu_reg_reg);
12468 %}
12469 
12470 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12471 %{
12472   predicate(VM_Version::supports_bmi2());
12473   match(Set dst (LShiftL (LoadL src) shift));
12474   ins_cost(175);
12475   format %{ "shlxq   $dst, $src, $shift" %}
12476   ins_encode %{
12477     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12478   %}
12479   ins_pipe(ialu_reg_mem);
12480 %}
12481 
12482 // Arithmetic Shift Right by 8-bit immediate
12483 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12484 %{
12485   predicate(!UseAPX);
12486   match(Set dst (RShiftL dst shift));
12487   effect(KILL cr);
12488 
12489   format %{ "sarq    $dst, $shift" %}
12490   ins_encode %{
12491     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12492   %}
12493   ins_pipe(ialu_mem_imm);
12494 %}
12495 
12496 // Arithmetic Shift Right by 8-bit immediate
12497 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12498 %{
12499   predicate(UseAPX);
12500   match(Set dst (RShiftL src shift));
12501   effect(KILL cr);
12502   flag(PD::Flag_ndd_demotable_opr1);
12503 
12504   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12505   ins_encode %{
12506     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12507   %}
12508   ins_pipe(ialu_mem_imm);
12509 %}
12510 
12511 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12512 %{
12513   predicate(UseAPX);
12514   match(Set dst (RShiftL (LoadL src) shift));
12515   effect(KILL cr);
12516 
12517   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12518   ins_encode %{
12519     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12520   %}
12521   ins_pipe(ialu_mem_imm);
12522 %}
12523 
12524 // Arithmetic Shift Right by 8-bit immediate
12525 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12526 %{
12527   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12528   effect(KILL cr);
12529 
12530   format %{ "sarq    $dst, $shift" %}
12531   ins_encode %{
12532     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12533   %}
12534   ins_pipe(ialu_mem_imm);
12535 %}
12536 
12537 // Arithmetic Shift Right by variable
12538 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12539 %{
12540   predicate(!VM_Version::supports_bmi2());
12541   match(Set dst (RShiftL dst shift));
12542   effect(KILL cr);
12543 
12544   format %{ "sarq    $dst, $shift" %}
12545   ins_encode %{
12546     __ sarq($dst$$Register);
12547   %}
12548   ins_pipe(ialu_reg_reg);
12549 %}
12550 
12551 // Arithmetic Shift Right by variable
12552 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12553 %{
12554   predicate(!VM_Version::supports_bmi2());
12555   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12556   effect(KILL cr);
12557 
12558   format %{ "sarq    $dst, $shift" %}
12559   ins_encode %{
12560     __ sarq($dst$$Address);
12561   %}
12562   ins_pipe(ialu_mem_reg);
12563 %}
12564 
12565 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12566 %{
12567   predicate(VM_Version::supports_bmi2());
12568   match(Set dst (RShiftL src shift));
12569 
12570   format %{ "sarxq   $dst, $src, $shift" %}
12571   ins_encode %{
12572     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12573   %}
12574   ins_pipe(ialu_reg_reg);
12575 %}
12576 
12577 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12578 %{
12579   predicate(VM_Version::supports_bmi2());
12580   match(Set dst (RShiftL (LoadL src) shift));
12581   ins_cost(175);
12582   format %{ "sarxq   $dst, $src, $shift" %}
12583   ins_encode %{
12584     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12585   %}
12586   ins_pipe(ialu_reg_mem);
12587 %}
12588 
12589 // Logical Shift Right by 8-bit immediate
12590 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12591 %{
12592   predicate(!UseAPX);
12593   match(Set dst (URShiftL dst shift));
12594   effect(KILL cr);
12595 
12596   format %{ "shrq    $dst, $shift" %}
12597   ins_encode %{
12598     __ shrq($dst$$Register, $shift$$constant);
12599   %}
12600   ins_pipe(ialu_reg);
12601 %}
12602 
12603 // Logical Shift Right by 8-bit immediate
12604 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12605 %{
12606   predicate(UseAPX);
12607   match(Set dst (URShiftL src shift));
12608   effect(KILL cr);
12609   flag(PD::Flag_ndd_demotable_opr1);
12610 
12611   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12612   ins_encode %{
12613     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12614   %}
12615   ins_pipe(ialu_reg);
12616 %}
12617 
12618 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12619 %{
12620   predicate(UseAPX);
12621   match(Set dst (URShiftL (LoadL src) shift));
12622   effect(KILL cr);
12623 
12624   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12625   ins_encode %{
12626     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12627   %}
12628   ins_pipe(ialu_reg);
12629 %}
12630 
12631 // Logical Shift Right by 8-bit immediate
12632 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12633 %{
12634   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12635   effect(KILL cr);
12636 
12637   format %{ "shrq    $dst, $shift" %}
12638   ins_encode %{
12639     __ shrq($dst$$Address, $shift$$constant);
12640   %}
12641   ins_pipe(ialu_mem_imm);
12642 %}
12643 
12644 // Logical Shift Right by variable
12645 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12646 %{
12647   predicate(!VM_Version::supports_bmi2());
12648   match(Set dst (URShiftL dst shift));
12649   effect(KILL cr);
12650 
12651   format %{ "shrq    $dst, $shift" %}
12652   ins_encode %{
12653     __ shrq($dst$$Register);
12654   %}
12655   ins_pipe(ialu_reg_reg);
12656 %}
12657 
12658 // Logical Shift Right by variable
12659 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12660 %{
12661   predicate(!VM_Version::supports_bmi2());
12662   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12663   effect(KILL cr);
12664 
12665   format %{ "shrq    $dst, $shift" %}
12666   ins_encode %{
12667     __ shrq($dst$$Address);
12668   %}
12669   ins_pipe(ialu_mem_reg);
12670 %}
12671 
12672 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12673 %{
12674   predicate(VM_Version::supports_bmi2());
12675   match(Set dst (URShiftL src shift));
12676 
12677   format %{ "shrxq   $dst, $src, $shift" %}
12678   ins_encode %{
12679     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12680   %}
12681   ins_pipe(ialu_reg_reg);
12682 %}
12683 
12684 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12685 %{
12686   predicate(VM_Version::supports_bmi2());
12687   match(Set dst (URShiftL (LoadL src) shift));
12688   ins_cost(175);
12689   format %{ "shrxq   $dst, $src, $shift" %}
12690   ins_encode %{
12691     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12692   %}
12693   ins_pipe(ialu_reg_mem);
12694 %}
12695 
12696 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12697 // This idiom is used by the compiler for the i2b bytecode.
12698 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12699 %{
12700   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12701 
12702   format %{ "movsbl  $dst, $src\t# i2b" %}
12703   ins_encode %{
12704     __ movsbl($dst$$Register, $src$$Register);
12705   %}
12706   ins_pipe(ialu_reg_reg);
12707 %}
12708 
12709 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12710 // This idiom is used by the compiler the i2s bytecode.
12711 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12712 %{
12713   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12714 
12715   format %{ "movswl  $dst, $src\t# i2s" %}
12716   ins_encode %{
12717     __ movswl($dst$$Register, $src$$Register);
12718   %}
12719   ins_pipe(ialu_reg_reg);
12720 %}
12721 
12722 // ROL/ROR instructions
12723 
12724 // Rotate left by constant.
12725 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12726 %{
12727   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12728   match(Set dst (RotateLeft dst shift));
12729   effect(KILL cr);
12730   format %{ "roll    $dst, $shift" %}
12731   ins_encode %{
12732     __ roll($dst$$Register, $shift$$constant);
12733   %}
12734   ins_pipe(ialu_reg);
12735 %}
12736 
12737 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12738 %{
12739   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12740   match(Set dst (RotateLeft src shift));
12741   format %{ "rolxl   $dst, $src, $shift" %}
12742   ins_encode %{
12743     int shift = 32 - ($shift$$constant & 31);
12744     __ rorxl($dst$$Register, $src$$Register, shift);
12745   %}
12746   ins_pipe(ialu_reg_reg);
12747 %}
12748 
12749 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12750 %{
12751   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12752   match(Set dst (RotateLeft (LoadI src) shift));
12753   ins_cost(175);
12754   format %{ "rolxl   $dst, $src, $shift" %}
12755   ins_encode %{
12756     int shift = 32 - ($shift$$constant & 31);
12757     __ rorxl($dst$$Register, $src$$Address, shift);
12758   %}
12759   ins_pipe(ialu_reg_mem);
12760 %}
12761 
12762 // Rotate Left by variable
12763 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12764 %{
12765   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12766   match(Set dst (RotateLeft dst shift));
12767   effect(KILL cr);
12768   format %{ "roll    $dst, $shift" %}
12769   ins_encode %{
12770     __ roll($dst$$Register);
12771   %}
12772   ins_pipe(ialu_reg_reg);
12773 %}
12774 
12775 // Rotate Left by variable
12776 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12777 %{
12778   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12779   match(Set dst (RotateLeft src shift));
12780   effect(KILL cr);
12781   flag(PD::Flag_ndd_demotable_opr1);
12782 
12783   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12784   ins_encode %{
12785     __ eroll($dst$$Register, $src$$Register, false);
12786   %}
12787   ins_pipe(ialu_reg_reg);
12788 %}
12789 
12790 // Rotate Right by constant.
12791 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12792 %{
12793   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12794   match(Set dst (RotateRight dst shift));
12795   effect(KILL cr);
12796   format %{ "rorl    $dst, $shift" %}
12797   ins_encode %{
12798     __ rorl($dst$$Register, $shift$$constant);
12799   %}
12800   ins_pipe(ialu_reg);
12801 %}
12802 
12803 // Rotate Right by constant.
12804 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12805 %{
12806   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12807   match(Set dst (RotateRight src shift));
12808   format %{ "rorxl   $dst, $src, $shift" %}
12809   ins_encode %{
12810     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12811   %}
12812   ins_pipe(ialu_reg_reg);
12813 %}
12814 
12815 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12816 %{
12817   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12818   match(Set dst (RotateRight (LoadI src) shift));
12819   ins_cost(175);
12820   format %{ "rorxl   $dst, $src, $shift" %}
12821   ins_encode %{
12822     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12823   %}
12824   ins_pipe(ialu_reg_mem);
12825 %}
12826 
12827 // Rotate Right by variable
12828 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12829 %{
12830   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12831   match(Set dst (RotateRight dst shift));
12832   effect(KILL cr);
12833   format %{ "rorl    $dst, $shift" %}
12834   ins_encode %{
12835     __ rorl($dst$$Register);
12836   %}
12837   ins_pipe(ialu_reg_reg);
12838 %}
12839 
12840 // Rotate Right by variable
12841 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12842 %{
12843   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12844   match(Set dst (RotateRight src shift));
12845   effect(KILL cr);
12846   flag(PD::Flag_ndd_demotable_opr1);
12847 
12848   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12849   ins_encode %{
12850     __ erorl($dst$$Register, $src$$Register, false);
12851   %}
12852   ins_pipe(ialu_reg_reg);
12853 %}
12854 
12855 // Rotate Left by constant.
12856 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12857 %{
12858   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12859   match(Set dst (RotateLeft dst shift));
12860   effect(KILL cr);
12861   format %{ "rolq    $dst, $shift" %}
12862   ins_encode %{
12863     __ rolq($dst$$Register, $shift$$constant);
12864   %}
12865   ins_pipe(ialu_reg);
12866 %}
12867 
12868 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12869 %{
12870   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12871   match(Set dst (RotateLeft src shift));
12872   format %{ "rolxq   $dst, $src, $shift" %}
12873   ins_encode %{
12874     int shift = 64 - ($shift$$constant & 63);
12875     __ rorxq($dst$$Register, $src$$Register, shift);
12876   %}
12877   ins_pipe(ialu_reg_reg);
12878 %}
12879 
12880 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12881 %{
12882   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12883   match(Set dst (RotateLeft (LoadL src) shift));
12884   ins_cost(175);
12885   format %{ "rolxq   $dst, $src, $shift" %}
12886   ins_encode %{
12887     int shift = 64 - ($shift$$constant & 63);
12888     __ rorxq($dst$$Register, $src$$Address, shift);
12889   %}
12890   ins_pipe(ialu_reg_mem);
12891 %}
12892 
12893 // Rotate Left by variable
12894 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12895 %{
12896   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12897   match(Set dst (RotateLeft dst shift));
12898   effect(KILL cr);
12899 
12900   format %{ "rolq    $dst, $shift" %}
12901   ins_encode %{
12902     __ rolq($dst$$Register);
12903   %}
12904   ins_pipe(ialu_reg_reg);
12905 %}
12906 
12907 // Rotate Left by variable
12908 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12909 %{
12910   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12911   match(Set dst (RotateLeft src shift));
12912   effect(KILL cr);
12913   flag(PD::Flag_ndd_demotable_opr1);
12914 
12915   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12916   ins_encode %{
12917     __ erolq($dst$$Register, $src$$Register, false);
12918   %}
12919   ins_pipe(ialu_reg_reg);
12920 %}
12921 
12922 // Rotate Right by constant.
12923 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12924 %{
12925   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12926   match(Set dst (RotateRight dst shift));
12927   effect(KILL cr);
12928   format %{ "rorq    $dst, $shift" %}
12929   ins_encode %{
12930     __ rorq($dst$$Register, $shift$$constant);
12931   %}
12932   ins_pipe(ialu_reg);
12933 %}
12934 
12935 // Rotate Right by constant
12936 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12937 %{
12938   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12939   match(Set dst (RotateRight src shift));
12940   format %{ "rorxq   $dst, $src, $shift" %}
12941   ins_encode %{
12942     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12943   %}
12944   ins_pipe(ialu_reg_reg);
12945 %}
12946 
12947 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12948 %{
12949   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12950   match(Set dst (RotateRight (LoadL src) shift));
12951   ins_cost(175);
12952   format %{ "rorxq   $dst, $src, $shift" %}
12953   ins_encode %{
12954     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12955   %}
12956   ins_pipe(ialu_reg_mem);
12957 %}
12958 
12959 // Rotate Right by variable
12960 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12961 %{
12962   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12963   match(Set dst (RotateRight dst shift));
12964   effect(KILL cr);
12965   format %{ "rorq    $dst, $shift" %}
12966   ins_encode %{
12967     __ rorq($dst$$Register);
12968   %}
12969   ins_pipe(ialu_reg_reg);
12970 %}
12971 
12972 // Rotate Right by variable
12973 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12974 %{
12975   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12976   match(Set dst (RotateRight src shift));
12977   effect(KILL cr);
12978   flag(PD::Flag_ndd_demotable_opr1);
12979 
12980   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12981   ins_encode %{
12982     __ erorq($dst$$Register, $src$$Register, false);
12983   %}
12984   ins_pipe(ialu_reg_reg);
12985 %}
12986 
12987 //----------------------------- CompressBits/ExpandBits ------------------------
12988 
12989 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12990   predicate(n->bottom_type()->isa_long());
12991   match(Set dst (CompressBits src mask));
12992   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12993   ins_encode %{
12994     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12995   %}
12996   ins_pipe( pipe_slow );
12997 %}
12998 
12999 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
13000   predicate(n->bottom_type()->isa_long());
13001   match(Set dst (ExpandBits src mask));
13002   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
13003   ins_encode %{
13004     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
13005   %}
13006   ins_pipe( pipe_slow );
13007 %}
13008 
13009 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
13010   predicate(n->bottom_type()->isa_long());
13011   match(Set dst (CompressBits src (LoadL mask)));
13012   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
13013   ins_encode %{
13014     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
13015   %}
13016   ins_pipe( pipe_slow );
13017 %}
13018 
13019 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
13020   predicate(n->bottom_type()->isa_long());
13021   match(Set dst (ExpandBits src (LoadL mask)));
13022   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
13023   ins_encode %{
13024     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
13025   %}
13026   ins_pipe( pipe_slow );
13027 %}
13028 
13029 
13030 // Logical Instructions
13031 
13032 // Integer Logical Instructions
13033 
13034 // And Instructions
13035 // And Register with Register
13036 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13037 %{
13038   predicate(!UseAPX);
13039   match(Set dst (AndI dst src));
13040   effect(KILL cr);
13041   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13042 
13043   format %{ "andl    $dst, $src\t# int" %}
13044   ins_encode %{
13045     __ andl($dst$$Register, $src$$Register);
13046   %}
13047   ins_pipe(ialu_reg_reg);
13048 %}
13049 
13050 // And Register with Register using New Data Destination (NDD)
13051 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13052 %{
13053   predicate(UseAPX);
13054   match(Set dst (AndI src1 src2));
13055   effect(KILL cr);
13056   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13057 
13058   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
13059   ins_encode %{
13060     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
13061 
13062   %}
13063   ins_pipe(ialu_reg_reg);
13064 %}
13065 
13066 // And Register with Immediate 255
13067 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
13068 %{
13069   match(Set dst (AndI src mask));
13070 
13071   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
13072   ins_encode %{
13073     __ movzbl($dst$$Register, $src$$Register);
13074   %}
13075   ins_pipe(ialu_reg);
13076 %}
13077 
13078 // And Register with Immediate 255 and promote to long
13079 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
13080 %{
13081   match(Set dst (ConvI2L (AndI src mask)));
13082 
13083   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
13084   ins_encode %{
13085     __ movzbl($dst$$Register, $src$$Register);
13086   %}
13087   ins_pipe(ialu_reg);
13088 %}
13089 
13090 // And Register with Immediate 65535
13091 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13092 %{
13093   match(Set dst (AndI src mask));
13094 
13095   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
13096   ins_encode %{
13097     __ movzwl($dst$$Register, $src$$Register);
13098   %}
13099   ins_pipe(ialu_reg);
13100 %}
13101 
13102 // And Register with Immediate 65535 and promote to long
13103 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13104 %{
13105   match(Set dst (ConvI2L (AndI src mask)));
13106 
13107   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
13108   ins_encode %{
13109     __ movzwl($dst$$Register, $src$$Register);
13110   %}
13111   ins_pipe(ialu_reg);
13112 %}
13113 
13114 // Can skip int2long conversions after AND with small bitmask
13115 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13116 %{
13117   predicate(VM_Version::supports_bmi2());
13118   ins_cost(125);
13119   effect(TEMP tmp, KILL cr);
13120   match(Set dst (ConvI2L (AndI src mask)));
13121   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
13122   ins_encode %{
13123     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13124     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13125   %}
13126   ins_pipe(ialu_reg_reg);
13127 %}
13128 
13129 // And Register with Immediate
13130 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13131 %{
13132   predicate(!UseAPX);
13133   match(Set dst (AndI dst src));
13134   effect(KILL cr);
13135   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13136 
13137   format %{ "andl    $dst, $src\t# int" %}
13138   ins_encode %{
13139     __ andl($dst$$Register, $src$$constant);
13140   %}
13141   ins_pipe(ialu_reg);
13142 %}
13143 
13144 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13145 %{
13146   predicate(UseAPX);
13147   match(Set dst (AndI src1 src2));
13148   effect(KILL cr);
13149   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13150 
13151   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13152   ins_encode %{
13153     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13154   %}
13155   ins_pipe(ialu_reg);
13156 %}
13157 
13158 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13159 %{
13160   predicate(UseAPX);
13161   match(Set dst (AndI (LoadI src1) src2));
13162   effect(KILL cr);
13163   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13164 
13165   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13166   ins_encode %{
13167     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13168   %}
13169   ins_pipe(ialu_reg);
13170 %}
13171 
13172 // And Register with Memory
13173 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13174 %{
13175   predicate(!UseAPX);
13176   match(Set dst (AndI dst (LoadI src)));
13177   effect(KILL cr);
13178   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13179 
13180   ins_cost(150);
13181   format %{ "andl    $dst, $src\t# int" %}
13182   ins_encode %{
13183     __ andl($dst$$Register, $src$$Address);
13184   %}
13185   ins_pipe(ialu_reg_mem);
13186 %}
13187 
13188 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13189 %{
13190   predicate(UseAPX);
13191   match(Set dst (AndI src1 (LoadI src2)));
13192   effect(KILL cr);
13193   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13194 
13195   ins_cost(150);
13196   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13197   ins_encode %{
13198     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13199   %}
13200   ins_pipe(ialu_reg_mem);
13201 %}
13202 
13203 // And Memory with Register
13204 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13205 %{
13206   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13207   effect(KILL cr);
13208   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13209 
13210   ins_cost(150);
13211   format %{ "andb    $dst, $src\t# byte" %}
13212   ins_encode %{
13213     __ andb($dst$$Address, $src$$Register);
13214   %}
13215   ins_pipe(ialu_mem_reg);
13216 %}
13217 
13218 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13219 %{
13220   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13221   effect(KILL cr);
13222   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13223 
13224   ins_cost(150);
13225   format %{ "andl    $dst, $src\t# int" %}
13226   ins_encode %{
13227     __ andl($dst$$Address, $src$$Register);
13228   %}
13229   ins_pipe(ialu_mem_reg);
13230 %}
13231 
13232 // And Memory with Immediate
13233 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13234 %{
13235   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13236   effect(KILL cr);
13237   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13238 
13239   ins_cost(125);
13240   format %{ "andl    $dst, $src\t# int" %}
13241   ins_encode %{
13242     __ andl($dst$$Address, $src$$constant);
13243   %}
13244   ins_pipe(ialu_mem_imm);
13245 %}
13246 
13247 // BMI1 instructions
13248 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13249   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13250   predicate(UseBMI1Instructions);
13251   effect(KILL cr);
13252   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13253 
13254   ins_cost(125);
13255   format %{ "andnl  $dst, $src1, $src2" %}
13256 
13257   ins_encode %{
13258     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13259   %}
13260   ins_pipe(ialu_reg_mem);
13261 %}
13262 
13263 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13264   match(Set dst (AndI (XorI src1 minus_1) src2));
13265   predicate(UseBMI1Instructions);
13266   effect(KILL cr);
13267   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13268 
13269   format %{ "andnl  $dst, $src1, $src2" %}
13270 
13271   ins_encode %{
13272     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13273   %}
13274   ins_pipe(ialu_reg);
13275 %}
13276 
13277 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13278   match(Set dst (AndI (SubI imm_zero src) src));
13279   predicate(UseBMI1Instructions);
13280   effect(KILL cr);
13281   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13282 
13283   format %{ "blsil  $dst, $src" %}
13284 
13285   ins_encode %{
13286     __ blsil($dst$$Register, $src$$Register);
13287   %}
13288   ins_pipe(ialu_reg);
13289 %}
13290 
13291 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13292   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13293   predicate(UseBMI1Instructions);
13294   effect(KILL cr);
13295   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13296 
13297   ins_cost(125);
13298   format %{ "blsil  $dst, $src" %}
13299 
13300   ins_encode %{
13301     __ blsil($dst$$Register, $src$$Address);
13302   %}
13303   ins_pipe(ialu_reg_mem);
13304 %}
13305 
13306 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13307 %{
13308   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13309   predicate(UseBMI1Instructions);
13310   effect(KILL cr);
13311   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13312 
13313   ins_cost(125);
13314   format %{ "blsmskl $dst, $src" %}
13315 
13316   ins_encode %{
13317     __ blsmskl($dst$$Register, $src$$Address);
13318   %}
13319   ins_pipe(ialu_reg_mem);
13320 %}
13321 
13322 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13323 %{
13324   match(Set dst (XorI (AddI src minus_1) src));
13325   predicate(UseBMI1Instructions);
13326   effect(KILL cr);
13327   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13328 
13329   format %{ "blsmskl $dst, $src" %}
13330 
13331   ins_encode %{
13332     __ blsmskl($dst$$Register, $src$$Register);
13333   %}
13334 
13335   ins_pipe(ialu_reg);
13336 %}
13337 
13338 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13339 %{
13340   match(Set dst (AndI (AddI src minus_1) src) );
13341   predicate(UseBMI1Instructions);
13342   effect(KILL cr);
13343   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13344 
13345   format %{ "blsrl  $dst, $src" %}
13346 
13347   ins_encode %{
13348     __ blsrl($dst$$Register, $src$$Register);
13349   %}
13350 
13351   ins_pipe(ialu_reg_mem);
13352 %}
13353 
13354 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13355 %{
13356   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13357   predicate(UseBMI1Instructions);
13358   effect(KILL cr);
13359   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13360 
13361   ins_cost(125);
13362   format %{ "blsrl  $dst, $src" %}
13363 
13364   ins_encode %{
13365     __ blsrl($dst$$Register, $src$$Address);
13366   %}
13367 
13368   ins_pipe(ialu_reg);
13369 %}
13370 
13371 // Or Instructions
13372 // Or Register with Register
13373 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13374 %{
13375   predicate(!UseAPX);
13376   match(Set dst (OrI dst src));
13377   effect(KILL cr);
13378   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13379 
13380   format %{ "orl     $dst, $src\t# int" %}
13381   ins_encode %{
13382     __ orl($dst$$Register, $src$$Register);
13383   %}
13384   ins_pipe(ialu_reg_reg);
13385 %}
13386 
13387 // Or Register with Register using New Data Destination (NDD)
13388 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13389 %{
13390   predicate(UseAPX);
13391   match(Set dst (OrI src1 src2));
13392   effect(KILL cr);
13393   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13394 
13395   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13396   ins_encode %{
13397     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13398   %}
13399   ins_pipe(ialu_reg_reg);
13400 %}
13401 
13402 // Or Register with Immediate
13403 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13404 %{
13405   predicate(!UseAPX);
13406   match(Set dst (OrI dst src));
13407   effect(KILL cr);
13408   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13409 
13410   format %{ "orl     $dst, $src\t# int" %}
13411   ins_encode %{
13412     __ orl($dst$$Register, $src$$constant);
13413   %}
13414   ins_pipe(ialu_reg);
13415 %}
13416 
13417 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13418 %{
13419   predicate(UseAPX);
13420   match(Set dst (OrI src1 src2));
13421   effect(KILL cr);
13422   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13423 
13424   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13425   ins_encode %{
13426     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13427   %}
13428   ins_pipe(ialu_reg);
13429 %}
13430 
13431 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13432 %{
13433   predicate(UseAPX);
13434   match(Set dst (OrI src1 src2));
13435   effect(KILL cr);
13436   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13437 
13438   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13439   ins_encode %{
13440     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13441   %}
13442   ins_pipe(ialu_reg);
13443 %}
13444 
13445 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13446 %{
13447   predicate(UseAPX);
13448   match(Set dst (OrI (LoadI src1) src2));
13449   effect(KILL cr);
13450   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13451 
13452   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13453   ins_encode %{
13454     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13455   %}
13456   ins_pipe(ialu_reg);
13457 %}
13458 
13459 // Or Register with Memory
13460 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13461 %{
13462   predicate(!UseAPX);
13463   match(Set dst (OrI dst (LoadI src)));
13464   effect(KILL cr);
13465   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13466 
13467   ins_cost(150);
13468   format %{ "orl     $dst, $src\t# int" %}
13469   ins_encode %{
13470     __ orl($dst$$Register, $src$$Address);
13471   %}
13472   ins_pipe(ialu_reg_mem);
13473 %}
13474 
13475 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13476 %{
13477   predicate(UseAPX);
13478   match(Set dst (OrI src1 (LoadI src2)));
13479   effect(KILL cr);
13480   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13481 
13482   ins_cost(150);
13483   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13484   ins_encode %{
13485     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13486   %}
13487   ins_pipe(ialu_reg_mem);
13488 %}
13489 
13490 // Or Memory with Register
13491 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13492 %{
13493   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13494   effect(KILL cr);
13495   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13496 
13497   ins_cost(150);
13498   format %{ "orb    $dst, $src\t# byte" %}
13499   ins_encode %{
13500     __ orb($dst$$Address, $src$$Register);
13501   %}
13502   ins_pipe(ialu_mem_reg);
13503 %}
13504 
13505 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13506 %{
13507   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13508   effect(KILL cr);
13509   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13510 
13511   ins_cost(150);
13512   format %{ "orl     $dst, $src\t# int" %}
13513   ins_encode %{
13514     __ orl($dst$$Address, $src$$Register);
13515   %}
13516   ins_pipe(ialu_mem_reg);
13517 %}
13518 
13519 // Or Memory with Immediate
13520 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13521 %{
13522   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13523   effect(KILL cr);
13524   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13525 
13526   ins_cost(125);
13527   format %{ "orl     $dst, $src\t# int" %}
13528   ins_encode %{
13529     __ orl($dst$$Address, $src$$constant);
13530   %}
13531   ins_pipe(ialu_mem_imm);
13532 %}
13533 
13534 // Xor Instructions
13535 // Xor Register with Register
13536 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13537 %{
13538   predicate(!UseAPX);
13539   match(Set dst (XorI dst src));
13540   effect(KILL cr);
13541   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13542 
13543   format %{ "xorl    $dst, $src\t# int" %}
13544   ins_encode %{
13545     __ xorl($dst$$Register, $src$$Register);
13546   %}
13547   ins_pipe(ialu_reg_reg);
13548 %}
13549 
13550 // Xor Register with Register using New Data Destination (NDD)
13551 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13552 %{
13553   predicate(UseAPX);
13554   match(Set dst (XorI src1 src2));
13555   effect(KILL cr);
13556   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13557 
13558   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13559   ins_encode %{
13560     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13561   %}
13562   ins_pipe(ialu_reg_reg);
13563 %}
13564 
13565 // Xor Register with Immediate -1
13566 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13567 %{
13568   predicate(!UseAPX);
13569   match(Set dst (XorI dst imm));
13570 
13571   format %{ "notl    $dst" %}
13572   ins_encode %{
13573      __ notl($dst$$Register);
13574   %}
13575   ins_pipe(ialu_reg);
13576 %}
13577 
13578 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13579 %{
13580   match(Set dst (XorI src imm));
13581   predicate(UseAPX);
13582   flag(PD::Flag_ndd_demotable_opr1);
13583 
13584   format %{ "enotl    $dst, $src" %}
13585   ins_encode %{
13586      __ enotl($dst$$Register, $src$$Register);
13587   %}
13588   ins_pipe(ialu_reg);
13589 %}
13590 
13591 // Xor Register with Immediate
13592 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13593 %{
13594   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13595   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13596   match(Set dst (XorI dst src));
13597   effect(KILL cr);
13598   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13599 
13600   format %{ "xorl    $dst, $src\t# int" %}
13601   ins_encode %{
13602     __ xorl($dst$$Register, $src$$constant);
13603   %}
13604   ins_pipe(ialu_reg);
13605 %}
13606 
13607 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13608 %{
13609   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13610   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13611   match(Set dst (XorI src1 src2));
13612   effect(KILL cr);
13613   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13614 
13615   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13616   ins_encode %{
13617     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13618   %}
13619   ins_pipe(ialu_reg);
13620 %}
13621 
13622 // Xor Memory with Immediate
13623 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13624 %{
13625   predicate(UseAPX);
13626   match(Set dst (XorI (LoadI src1) src2));
13627   effect(KILL cr);
13628   ins_cost(150);
13629   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13630 
13631   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13632   ins_encode %{
13633     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13634   %}
13635   ins_pipe(ialu_reg);
13636 %}
13637 
13638 // Xor Register with Memory
13639 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13640 %{
13641   predicate(!UseAPX);
13642   match(Set dst (XorI dst (LoadI src)));
13643   effect(KILL cr);
13644   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13645 
13646   ins_cost(150);
13647   format %{ "xorl    $dst, $src\t# int" %}
13648   ins_encode %{
13649     __ xorl($dst$$Register, $src$$Address);
13650   %}
13651   ins_pipe(ialu_reg_mem);
13652 %}
13653 
13654 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13655 %{
13656   predicate(UseAPX);
13657   match(Set dst (XorI src1 (LoadI src2)));
13658   effect(KILL cr);
13659   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13660 
13661   ins_cost(150);
13662   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13663   ins_encode %{
13664     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13665   %}
13666   ins_pipe(ialu_reg_mem);
13667 %}
13668 
13669 // Xor Memory with Register
13670 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13671 %{
13672   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13673   effect(KILL cr);
13674   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13675 
13676   ins_cost(150);
13677   format %{ "xorb    $dst, $src\t# byte" %}
13678   ins_encode %{
13679     __ xorb($dst$$Address, $src$$Register);
13680   %}
13681   ins_pipe(ialu_mem_reg);
13682 %}
13683 
13684 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13685 %{
13686   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13687   effect(KILL cr);
13688   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13689 
13690   ins_cost(150);
13691   format %{ "xorl    $dst, $src\t# int" %}
13692   ins_encode %{
13693     __ xorl($dst$$Address, $src$$Register);
13694   %}
13695   ins_pipe(ialu_mem_reg);
13696 %}
13697 
13698 // Xor Memory with Immediate
13699 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13700 %{
13701   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13702   effect(KILL cr);
13703   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13704 
13705   ins_cost(125);
13706   format %{ "xorl    $dst, $src\t# int" %}
13707   ins_encode %{
13708     __ xorl($dst$$Address, $src$$constant);
13709   %}
13710   ins_pipe(ialu_mem_imm);
13711 %}
13712 
13713 
13714 // Long Logical Instructions
13715 
13716 // And Instructions
13717 // And Register with Register
13718 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13719 %{
13720   predicate(!UseAPX);
13721   match(Set dst (AndL dst src));
13722   effect(KILL cr);
13723   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13724 
13725   format %{ "andq    $dst, $src\t# long" %}
13726   ins_encode %{
13727     __ andq($dst$$Register, $src$$Register);
13728   %}
13729   ins_pipe(ialu_reg_reg);
13730 %}
13731 
13732 // And Register with Register using New Data Destination (NDD)
13733 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13734 %{
13735   predicate(UseAPX);
13736   match(Set dst (AndL src1 src2));
13737   effect(KILL cr);
13738   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13739 
13740   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13741   ins_encode %{
13742     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13743 
13744   %}
13745   ins_pipe(ialu_reg_reg);
13746 %}
13747 
13748 // And Register with Immediate 255
13749 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13750 %{
13751   match(Set dst (AndL src mask));
13752 
13753   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13754   ins_encode %{
13755     // movzbl zeroes out the upper 32-bit and does not need REX.W
13756     __ movzbl($dst$$Register, $src$$Register);
13757   %}
13758   ins_pipe(ialu_reg);
13759 %}
13760 
13761 // And Register with Immediate 65535
13762 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13763 %{
13764   match(Set dst (AndL src mask));
13765 
13766   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13767   ins_encode %{
13768     // movzwl zeroes out the upper 32-bit and does not need REX.W
13769     __ movzwl($dst$$Register, $src$$Register);
13770   %}
13771   ins_pipe(ialu_reg);
13772 %}
13773 
13774 // And Register with Immediate
13775 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13776 %{
13777   predicate(!UseAPX);
13778   match(Set dst (AndL dst src));
13779   effect(KILL cr);
13780   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13781 
13782   format %{ "andq    $dst, $src\t# long" %}
13783   ins_encode %{
13784     __ andq($dst$$Register, $src$$constant);
13785   %}
13786   ins_pipe(ialu_reg);
13787 %}
13788 
13789 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13790 %{
13791   predicate(UseAPX);
13792   match(Set dst (AndL src1 src2));
13793   effect(KILL cr);
13794   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13795 
13796   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13797   ins_encode %{
13798     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13799   %}
13800   ins_pipe(ialu_reg);
13801 %}
13802 
13803 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13804 %{
13805   predicate(UseAPX);
13806   match(Set dst (AndL (LoadL src1) src2));
13807   effect(KILL cr);
13808   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13809 
13810   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13811   ins_encode %{
13812     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13813   %}
13814   ins_pipe(ialu_reg);
13815 %}
13816 
13817 // And Register with Memory
13818 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13819 %{
13820   predicate(!UseAPX);
13821   match(Set dst (AndL dst (LoadL src)));
13822   effect(KILL cr);
13823   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13824 
13825   ins_cost(150);
13826   format %{ "andq    $dst, $src\t# long" %}
13827   ins_encode %{
13828     __ andq($dst$$Register, $src$$Address);
13829   %}
13830   ins_pipe(ialu_reg_mem);
13831 %}
13832 
13833 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13834 %{
13835   predicate(UseAPX);
13836   match(Set dst (AndL src1 (LoadL src2)));
13837   effect(KILL cr);
13838   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13839 
13840   ins_cost(150);
13841   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13842   ins_encode %{
13843     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13844   %}
13845   ins_pipe(ialu_reg_mem);
13846 %}
13847 
13848 // And Memory with Register
13849 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13850 %{
13851   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13852   effect(KILL cr);
13853   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13854 
13855   ins_cost(150);
13856   format %{ "andq    $dst, $src\t# long" %}
13857   ins_encode %{
13858     __ andq($dst$$Address, $src$$Register);
13859   %}
13860   ins_pipe(ialu_mem_reg);
13861 %}
13862 
13863 // And Memory with Immediate
13864 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13865 %{
13866   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13867   effect(KILL cr);
13868   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13869 
13870   ins_cost(125);
13871   format %{ "andq    $dst, $src\t# long" %}
13872   ins_encode %{
13873     __ andq($dst$$Address, $src$$constant);
13874   %}
13875   ins_pipe(ialu_mem_imm);
13876 %}
13877 
13878 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13879 %{
13880   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13881   // because AND/OR works well enough for 8/32-bit values.
13882   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13883 
13884   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13885   effect(KILL cr);
13886 
13887   ins_cost(125);
13888   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13889   ins_encode %{
13890     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13891   %}
13892   ins_pipe(ialu_mem_imm);
13893 %}
13894 
13895 // BMI1 instructions
13896 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13897   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13898   predicate(UseBMI1Instructions);
13899   effect(KILL cr);
13900   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13901 
13902   ins_cost(125);
13903   format %{ "andnq  $dst, $src1, $src2" %}
13904 
13905   ins_encode %{
13906     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13907   %}
13908   ins_pipe(ialu_reg_mem);
13909 %}
13910 
13911 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13912   match(Set dst (AndL (XorL src1 minus_1) src2));
13913   predicate(UseBMI1Instructions);
13914   effect(KILL cr);
13915   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13916 
13917   format %{ "andnq  $dst, $src1, $src2" %}
13918 
13919   ins_encode %{
13920   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13921   %}
13922   ins_pipe(ialu_reg_mem);
13923 %}
13924 
13925 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13926   match(Set dst (AndL (SubL imm_zero src) src));
13927   predicate(UseBMI1Instructions);
13928   effect(KILL cr);
13929   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13930 
13931   format %{ "blsiq  $dst, $src" %}
13932 
13933   ins_encode %{
13934     __ blsiq($dst$$Register, $src$$Register);
13935   %}
13936   ins_pipe(ialu_reg);
13937 %}
13938 
13939 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13940   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13941   predicate(UseBMI1Instructions);
13942   effect(KILL cr);
13943   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13944 
13945   ins_cost(125);
13946   format %{ "blsiq  $dst, $src" %}
13947 
13948   ins_encode %{
13949     __ blsiq($dst$$Register, $src$$Address);
13950   %}
13951   ins_pipe(ialu_reg_mem);
13952 %}
13953 
13954 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13955 %{
13956   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13957   predicate(UseBMI1Instructions);
13958   effect(KILL cr);
13959   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13960 
13961   ins_cost(125);
13962   format %{ "blsmskq $dst, $src" %}
13963 
13964   ins_encode %{
13965     __ blsmskq($dst$$Register, $src$$Address);
13966   %}
13967   ins_pipe(ialu_reg_mem);
13968 %}
13969 
13970 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13971 %{
13972   match(Set dst (XorL (AddL src minus_1) src));
13973   predicate(UseBMI1Instructions);
13974   effect(KILL cr);
13975   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13976 
13977   format %{ "blsmskq $dst, $src" %}
13978 
13979   ins_encode %{
13980     __ blsmskq($dst$$Register, $src$$Register);
13981   %}
13982 
13983   ins_pipe(ialu_reg);
13984 %}
13985 
13986 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13987 %{
13988   match(Set dst (AndL (AddL src minus_1) src) );
13989   predicate(UseBMI1Instructions);
13990   effect(KILL cr);
13991   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13992 
13993   format %{ "blsrq  $dst, $src" %}
13994 
13995   ins_encode %{
13996     __ blsrq($dst$$Register, $src$$Register);
13997   %}
13998 
13999   ins_pipe(ialu_reg);
14000 %}
14001 
14002 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
14003 %{
14004   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
14005   predicate(UseBMI1Instructions);
14006   effect(KILL cr);
14007   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
14008 
14009   ins_cost(125);
14010   format %{ "blsrq  $dst, $src" %}
14011 
14012   ins_encode %{
14013     __ blsrq($dst$$Register, $src$$Address);
14014   %}
14015 
14016   ins_pipe(ialu_reg);
14017 %}
14018 
14019 // Or Instructions
14020 // Or Register with Register
14021 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14022 %{
14023   predicate(!UseAPX);
14024   match(Set dst (OrL dst src));
14025   effect(KILL cr);
14026   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14027 
14028   format %{ "orq     $dst, $src\t# long" %}
14029   ins_encode %{
14030     __ orq($dst$$Register, $src$$Register);
14031   %}
14032   ins_pipe(ialu_reg_reg);
14033 %}
14034 
14035 // Or Register with Register using New Data Destination (NDD)
14036 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14037 %{
14038   predicate(UseAPX);
14039   match(Set dst (OrL src1 src2));
14040   effect(KILL cr);
14041   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14042 
14043   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14044   ins_encode %{
14045     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14046 
14047   %}
14048   ins_pipe(ialu_reg_reg);
14049 %}
14050 
14051 // Use any_RegP to match R15 (TLS register) without spilling.
14052 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
14053   match(Set dst (OrL dst (CastP2X src)));
14054   effect(KILL cr);
14055   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14056 
14057   format %{ "orq     $dst, $src\t# long" %}
14058   ins_encode %{
14059     __ orq($dst$$Register, $src$$Register);
14060   %}
14061   ins_pipe(ialu_reg_reg);
14062 %}
14063 
14064 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
14065   match(Set dst (OrL src1 (CastP2X src2)));
14066   effect(KILL cr);
14067   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14068 
14069   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14070   ins_encode %{
14071     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14072   %}
14073   ins_pipe(ialu_reg_reg);
14074 %}
14075 
14076 // Or Register with Immediate
14077 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14078 %{
14079   predicate(!UseAPX);
14080   match(Set dst (OrL dst src));
14081   effect(KILL cr);
14082   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14083 
14084   format %{ "orq     $dst, $src\t# long" %}
14085   ins_encode %{
14086     __ orq($dst$$Register, $src$$constant);
14087   %}
14088   ins_pipe(ialu_reg);
14089 %}
14090 
14091 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14092 %{
14093   predicate(UseAPX);
14094   match(Set dst (OrL src1 src2));
14095   effect(KILL cr);
14096   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14097 
14098   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14099   ins_encode %{
14100     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14101   %}
14102   ins_pipe(ialu_reg);
14103 %}
14104 
14105 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14106 %{
14107   predicate(UseAPX);
14108   match(Set dst (OrL src1 src2));
14109   effect(KILL cr);
14110   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14111 
14112   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
14113   ins_encode %{
14114     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14115   %}
14116   ins_pipe(ialu_reg);
14117 %}
14118 
14119 // Or Memory with Immediate
14120 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14121 %{
14122   predicate(UseAPX);
14123   match(Set dst (OrL (LoadL src1) src2));
14124   effect(KILL cr);
14125   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14126 
14127   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14128   ins_encode %{
14129     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14130   %}
14131   ins_pipe(ialu_reg);
14132 %}
14133 
14134 // Or Register with Memory
14135 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14136 %{
14137   predicate(!UseAPX);
14138   match(Set dst (OrL dst (LoadL src)));
14139   effect(KILL cr);
14140   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14141 
14142   ins_cost(150);
14143   format %{ "orq     $dst, $src\t# long" %}
14144   ins_encode %{
14145     __ orq($dst$$Register, $src$$Address);
14146   %}
14147   ins_pipe(ialu_reg_mem);
14148 %}
14149 
14150 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14151 %{
14152   predicate(UseAPX);
14153   match(Set dst (OrL src1 (LoadL src2)));
14154   effect(KILL cr);
14155   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14156 
14157   ins_cost(150);
14158   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14159   ins_encode %{
14160     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14161   %}
14162   ins_pipe(ialu_reg_mem);
14163 %}
14164 
14165 // Or Memory with Register
14166 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14167 %{
14168   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14169   effect(KILL cr);
14170   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14171 
14172   ins_cost(150);
14173   format %{ "orq     $dst, $src\t# long" %}
14174   ins_encode %{
14175     __ orq($dst$$Address, $src$$Register);
14176   %}
14177   ins_pipe(ialu_mem_reg);
14178 %}
14179 
14180 // Or Memory with Immediate
14181 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14182 %{
14183   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14184   effect(KILL cr);
14185   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14186 
14187   ins_cost(125);
14188   format %{ "orq     $dst, $src\t# long" %}
14189   ins_encode %{
14190     __ orq($dst$$Address, $src$$constant);
14191   %}
14192   ins_pipe(ialu_mem_imm);
14193 %}
14194 
14195 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14196 %{
14197   // con should be a pure 64-bit power of 2 immediate
14198   // because AND/OR works well enough for 8/32-bit values.
14199   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14200 
14201   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14202   effect(KILL cr);
14203 
14204   ins_cost(125);
14205   format %{ "btsq    $dst, log2($con)\t# long" %}
14206   ins_encode %{
14207     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14208   %}
14209   ins_pipe(ialu_mem_imm);
14210 %}
14211 
14212 // Xor Instructions
14213 // Xor Register with Register
14214 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14215 %{
14216   predicate(!UseAPX);
14217   match(Set dst (XorL dst src));
14218   effect(KILL cr);
14219   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14220 
14221   format %{ "xorq    $dst, $src\t# long" %}
14222   ins_encode %{
14223     __ xorq($dst$$Register, $src$$Register);
14224   %}
14225   ins_pipe(ialu_reg_reg);
14226 %}
14227 
14228 // Xor Register with Register using New Data Destination (NDD)
14229 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14230 %{
14231   predicate(UseAPX);
14232   match(Set dst (XorL src1 src2));
14233   effect(KILL cr);
14234   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14235 
14236   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14237   ins_encode %{
14238     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14239   %}
14240   ins_pipe(ialu_reg_reg);
14241 %}
14242 
14243 // Xor Register with Immediate -1
14244 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14245 %{
14246   predicate(!UseAPX);
14247   match(Set dst (XorL dst imm));
14248 
14249   format %{ "notq   $dst" %}
14250   ins_encode %{
14251      __ notq($dst$$Register);
14252   %}
14253   ins_pipe(ialu_reg);
14254 %}
14255 
14256 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14257 %{
14258   predicate(UseAPX);
14259   match(Set dst (XorL src imm));
14260   flag(PD::Flag_ndd_demotable_opr1);
14261 
14262   format %{ "enotq   $dst, $src" %}
14263   ins_encode %{
14264     __ enotq($dst$$Register, $src$$Register);
14265   %}
14266   ins_pipe(ialu_reg);
14267 %}
14268 
14269 // Xor Register with Immediate
14270 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14271 %{
14272   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14273   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14274   match(Set dst (XorL dst src));
14275   effect(KILL cr);
14276   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14277 
14278   format %{ "xorq    $dst, $src\t# long" %}
14279   ins_encode %{
14280     __ xorq($dst$$Register, $src$$constant);
14281   %}
14282   ins_pipe(ialu_reg);
14283 %}
14284 
14285 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14286 %{
14287   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14288   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14289   match(Set dst (XorL src1 src2));
14290   effect(KILL cr);
14291   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14292 
14293   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14294   ins_encode %{
14295     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14296   %}
14297   ins_pipe(ialu_reg);
14298 %}
14299 
14300 // Xor Memory with Immediate
14301 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14302 %{
14303   predicate(UseAPX);
14304   match(Set dst (XorL (LoadL src1) src2));
14305   effect(KILL cr);
14306   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14307   ins_cost(150);
14308 
14309   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14310   ins_encode %{
14311     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14312   %}
14313   ins_pipe(ialu_reg);
14314 %}
14315 
14316 // Xor Register with Memory
14317 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14318 %{
14319   predicate(!UseAPX);
14320   match(Set dst (XorL dst (LoadL src)));
14321   effect(KILL cr);
14322   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14323 
14324   ins_cost(150);
14325   format %{ "xorq    $dst, $src\t# long" %}
14326   ins_encode %{
14327     __ xorq($dst$$Register, $src$$Address);
14328   %}
14329   ins_pipe(ialu_reg_mem);
14330 %}
14331 
14332 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14333 %{
14334   predicate(UseAPX);
14335   match(Set dst (XorL src1 (LoadL src2)));
14336   effect(KILL cr);
14337   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14338 
14339   ins_cost(150);
14340   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14341   ins_encode %{
14342     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14343   %}
14344   ins_pipe(ialu_reg_mem);
14345 %}
14346 
14347 // Xor Memory with Register
14348 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14349 %{
14350   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14351   effect(KILL cr);
14352   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14353 
14354   ins_cost(150);
14355   format %{ "xorq    $dst, $src\t# long" %}
14356   ins_encode %{
14357     __ xorq($dst$$Address, $src$$Register);
14358   %}
14359   ins_pipe(ialu_mem_reg);
14360 %}
14361 
14362 // Xor Memory with Immediate
14363 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14364 %{
14365   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14366   effect(KILL cr);
14367   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14368 
14369   ins_cost(125);
14370   format %{ "xorq    $dst, $src\t# long" %}
14371   ins_encode %{
14372     __ xorq($dst$$Address, $src$$constant);
14373   %}
14374   ins_pipe(ialu_mem_imm);
14375 %}
14376 
14377 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14378 %{
14379   match(Set dst (CmpLTMask p q));
14380   effect(KILL cr);
14381 
14382   ins_cost(400);
14383   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14384             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14385             "negl    $dst" %}
14386   ins_encode %{
14387     __ cmpl($p$$Register, $q$$Register);
14388     __ setcc(Assembler::less, $dst$$Register);
14389     __ negl($dst$$Register);
14390   %}
14391   ins_pipe(pipe_slow);
14392 %}
14393 
14394 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14395 %{
14396   match(Set dst (CmpLTMask dst zero));
14397   effect(KILL cr);
14398 
14399   ins_cost(100);
14400   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14401   ins_encode %{
14402     __ sarl($dst$$Register, 31);
14403   %}
14404   ins_pipe(ialu_reg);
14405 %}
14406 
14407 /* Better to save a register than avoid a branch */
14408 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14409 %{
14410   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14411   effect(KILL cr);
14412   ins_cost(300);
14413   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14414             "jge     done\n\t"
14415             "addl    $p,$y\n"
14416             "done:   " %}
14417   ins_encode %{
14418     Register Rp = $p$$Register;
14419     Register Rq = $q$$Register;
14420     Register Ry = $y$$Register;
14421     Label done;
14422     __ subl(Rp, Rq);
14423     __ jccb(Assembler::greaterEqual, done);
14424     __ addl(Rp, Ry);
14425     __ bind(done);
14426   %}
14427   ins_pipe(pipe_cmplt);
14428 %}
14429 
14430 /* Better to save a register than avoid a branch */
14431 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14432 %{
14433   match(Set y (AndI (CmpLTMask p q) y));
14434   effect(KILL cr);
14435 
14436   ins_cost(300);
14437 
14438   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14439             "jlt     done\n\t"
14440             "xorl    $y, $y\n"
14441             "done:   " %}
14442   ins_encode %{
14443     Register Rp = $p$$Register;
14444     Register Rq = $q$$Register;
14445     Register Ry = $y$$Register;
14446     Label done;
14447     __ cmpl(Rp, Rq);
14448     __ jccb(Assembler::less, done);
14449     __ xorl(Ry, Ry);
14450     __ bind(done);
14451   %}
14452   ins_pipe(pipe_cmplt);
14453 %}
14454 
14455 
14456 //---------- FP Instructions------------------------------------------------
14457 
14458 // Really expensive, avoid
14459 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14460 %{
14461   match(Set cr (CmpF src1 src2));
14462 
14463   ins_cost(500);
14464   format %{ "ucomiss $src1, $src2\n\t"
14465             "jnp,s   exit\n\t"
14466             "pushfq\t# saw NaN, set CF\n\t"
14467             "andq    [rsp], #0xffffff2b\n\t"
14468             "popfq\n"
14469     "exit:" %}
14470   ins_encode %{
14471     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14472     emit_cmpfp_fixup(masm);
14473   %}
14474   ins_pipe(pipe_slow);
14475 %}
14476 
14477 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14478   match(Set cr (CmpF src1 src2));
14479 
14480   ins_cost(100);
14481   format %{ "ucomiss $src1, $src2" %}
14482   ins_encode %{
14483     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14484   %}
14485   ins_pipe(pipe_slow);
14486 %}
14487 
14488 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14489   match(Set cr (CmpF src1 src2));
14490 
14491   ins_cost(100);
14492   format %{ "evucomxss $src1, $src2" %}
14493   ins_encode %{
14494     __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14495   %}
14496   ins_pipe(pipe_slow);
14497 %}
14498 
14499 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14500   match(Set cr (CmpF src1 (LoadF src2)));
14501 
14502   ins_cost(100);
14503   format %{ "ucomiss $src1, $src2" %}
14504   ins_encode %{
14505     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14506   %}
14507   ins_pipe(pipe_slow);
14508 %}
14509 
14510 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14511   match(Set cr (CmpF src1 (LoadF src2)));
14512 
14513   ins_cost(100);
14514   format %{ "evucomxss $src1, $src2" %}
14515   ins_encode %{
14516     __ evucomxss($src1$$XMMRegister, $src2$$Address);
14517   %}
14518   ins_pipe(pipe_slow);
14519 %}
14520 
14521 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14522   match(Set cr (CmpF src con));
14523 
14524   ins_cost(100);
14525   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14526   ins_encode %{
14527     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14528   %}
14529   ins_pipe(pipe_slow);
14530 %}
14531 
14532 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14533   match(Set cr (CmpF src con));
14534 
14535   ins_cost(100);
14536   format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14537   ins_encode %{
14538     __ evucomxss($src$$XMMRegister, $constantaddress($con));
14539   %}
14540   ins_pipe(pipe_slow);
14541 %}
14542 
14543 // Really expensive, avoid
14544 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14545 %{
14546   match(Set cr (CmpD src1 src2));
14547 
14548   ins_cost(500);
14549   format %{ "ucomisd $src1, $src2\n\t"
14550             "jnp,s   exit\n\t"
14551             "pushfq\t# saw NaN, set CF\n\t"
14552             "andq    [rsp], #0xffffff2b\n\t"
14553             "popfq\n"
14554     "exit:" %}
14555   ins_encode %{
14556     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14557     emit_cmpfp_fixup(masm);
14558   %}
14559   ins_pipe(pipe_slow);
14560 %}
14561 
14562 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14563   match(Set cr (CmpD src1 src2));
14564 
14565   ins_cost(100);
14566   format %{ "ucomisd $src1, $src2 test" %}
14567   ins_encode %{
14568     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14569   %}
14570   ins_pipe(pipe_slow);
14571 %}
14572 
14573 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14574   match(Set cr (CmpD src1 src2));
14575 
14576   ins_cost(100);
14577   format %{ "evucomxsd $src1, $src2 test" %}
14578   ins_encode %{
14579     __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14580   %}
14581   ins_pipe(pipe_slow);
14582 %}
14583 
14584 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14585   match(Set cr (CmpD src1 (LoadD src2)));
14586 
14587   ins_cost(100);
14588   format %{ "ucomisd $src1, $src2" %}
14589   ins_encode %{
14590     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14591   %}
14592   ins_pipe(pipe_slow);
14593 %}
14594 
14595 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14596   match(Set cr (CmpD src1 (LoadD src2)));
14597 
14598   ins_cost(100);
14599   format %{ "evucomxsd $src1, $src2" %}
14600   ins_encode %{
14601     __ evucomxsd($src1$$XMMRegister, $src2$$Address);
14602   %}
14603   ins_pipe(pipe_slow);
14604 %}
14605 
14606 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14607   match(Set cr (CmpD src con));
14608   ins_cost(100);
14609   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14610   ins_encode %{
14611     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14612   %}
14613   ins_pipe(pipe_slow);
14614 %}
14615 
14616 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14617   match(Set cr (CmpD src con));
14618 
14619   ins_cost(100);
14620   format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14621   ins_encode %{
14622     __ evucomxsd($src$$XMMRegister, $constantaddress($con));
14623   %}
14624   ins_pipe(pipe_slow);
14625 %}
14626 
14627 // Compare into -1,0,1
14628 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14629 %{
14630   match(Set dst (CmpF3 src1 src2));
14631   effect(KILL cr);
14632 
14633   ins_cost(275);
14634   format %{ "ucomiss $src1, $src2\n\t"
14635             "movl    $dst, #-1\n\t"
14636             "jp,s    done\n\t"
14637             "jb,s    done\n\t"
14638             "setne   $dst\n\t"
14639             "movzbl  $dst, $dst\n"
14640     "done:" %}
14641   ins_encode %{
14642     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14643     emit_cmpfp3(masm, $dst$$Register);
14644   %}
14645   ins_pipe(pipe_slow);
14646 %}
14647 
14648 // Compare into -1,0,1
14649 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14650 %{
14651   match(Set dst (CmpF3 src1 (LoadF src2)));
14652   effect(KILL cr);
14653 
14654   ins_cost(275);
14655   format %{ "ucomiss $src1, $src2\n\t"
14656             "movl    $dst, #-1\n\t"
14657             "jp,s    done\n\t"
14658             "jb,s    done\n\t"
14659             "setne   $dst\n\t"
14660             "movzbl  $dst, $dst\n"
14661     "done:" %}
14662   ins_encode %{
14663     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14664     emit_cmpfp3(masm, $dst$$Register);
14665   %}
14666   ins_pipe(pipe_slow);
14667 %}
14668 
14669 // Compare into -1,0,1
14670 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14671   match(Set dst (CmpF3 src con));
14672   effect(KILL cr);
14673 
14674   ins_cost(275);
14675   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14676             "movl    $dst, #-1\n\t"
14677             "jp,s    done\n\t"
14678             "jb,s    done\n\t"
14679             "setne   $dst\n\t"
14680             "movzbl  $dst, $dst\n"
14681     "done:" %}
14682   ins_encode %{
14683     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14684     emit_cmpfp3(masm, $dst$$Register);
14685   %}
14686   ins_pipe(pipe_slow);
14687 %}
14688 
14689 // Compare into -1,0,1
14690 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14691 %{
14692   match(Set dst (CmpD3 src1 src2));
14693   effect(KILL cr);
14694 
14695   ins_cost(275);
14696   format %{ "ucomisd $src1, $src2\n\t"
14697             "movl    $dst, #-1\n\t"
14698             "jp,s    done\n\t"
14699             "jb,s    done\n\t"
14700             "setne   $dst\n\t"
14701             "movzbl  $dst, $dst\n"
14702     "done:" %}
14703   ins_encode %{
14704     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14705     emit_cmpfp3(masm, $dst$$Register);
14706   %}
14707   ins_pipe(pipe_slow);
14708 %}
14709 
14710 // Compare into -1,0,1
14711 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14712 %{
14713   match(Set dst (CmpD3 src1 (LoadD src2)));
14714   effect(KILL cr);
14715 
14716   ins_cost(275);
14717   format %{ "ucomisd $src1, $src2\n\t"
14718             "movl    $dst, #-1\n\t"
14719             "jp,s    done\n\t"
14720             "jb,s    done\n\t"
14721             "setne   $dst\n\t"
14722             "movzbl  $dst, $dst\n"
14723     "done:" %}
14724   ins_encode %{
14725     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14726     emit_cmpfp3(masm, $dst$$Register);
14727   %}
14728   ins_pipe(pipe_slow);
14729 %}
14730 
14731 // Compare into -1,0,1
14732 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14733   match(Set dst (CmpD3 src con));
14734   effect(KILL cr);
14735 
14736   ins_cost(275);
14737   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14738             "movl    $dst, #-1\n\t"
14739             "jp,s    done\n\t"
14740             "jb,s    done\n\t"
14741             "setne   $dst\n\t"
14742             "movzbl  $dst, $dst\n"
14743     "done:" %}
14744   ins_encode %{
14745     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14746     emit_cmpfp3(masm, $dst$$Register);
14747   %}
14748   ins_pipe(pipe_slow);
14749 %}
14750 
14751 //----------Arithmetic Conversion Instructions---------------------------------
14752 
14753 instruct convF2D_reg_reg(regD dst, regF src)
14754 %{
14755   match(Set dst (ConvF2D src));
14756 
14757   format %{ "cvtss2sd $dst, $src" %}
14758   ins_encode %{
14759     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14760   %}
14761   ins_pipe(pipe_slow); // XXX
14762 %}
14763 
14764 instruct convF2D_reg_mem(regD dst, memory src)
14765 %{
14766   predicate(UseAVX == 0);
14767   match(Set dst (ConvF2D (LoadF src)));
14768 
14769   format %{ "cvtss2sd $dst, $src" %}
14770   ins_encode %{
14771     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14772   %}
14773   ins_pipe(pipe_slow); // XXX
14774 %}
14775 
14776 instruct convD2F_reg_reg(regF dst, regD src)
14777 %{
14778   match(Set dst (ConvD2F src));
14779 
14780   format %{ "cvtsd2ss $dst, $src" %}
14781   ins_encode %{
14782     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14783   %}
14784   ins_pipe(pipe_slow); // XXX
14785 %}
14786 
14787 instruct convD2F_reg_mem(regF dst, memory src)
14788 %{
14789   predicate(UseAVX == 0);
14790   match(Set dst (ConvD2F (LoadD src)));
14791 
14792   format %{ "cvtsd2ss $dst, $src" %}
14793   ins_encode %{
14794     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14795   %}
14796   ins_pipe(pipe_slow); // XXX
14797 %}
14798 
14799 // XXX do mem variants
14800 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14801 %{
14802   predicate(!VM_Version::supports_avx10_2());
14803   match(Set dst (ConvF2I src));
14804   effect(KILL cr);
14805   format %{ "convert_f2i $dst, $src" %}
14806   ins_encode %{
14807     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14808   %}
14809   ins_pipe(pipe_slow);
14810 %}
14811 
14812 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14813 %{
14814   predicate(VM_Version::supports_avx10_2());
14815   match(Set dst (ConvF2I src));
14816   format %{ "evcvttss2sisl $dst, $src" %}
14817   ins_encode %{
14818     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14819   %}
14820   ins_pipe(pipe_slow);
14821 %}
14822 
14823 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14824 %{
14825   predicate(VM_Version::supports_avx10_2());
14826   match(Set dst (ConvF2I (LoadF src)));
14827   format %{ "evcvttss2sisl $dst, $src" %}
14828   ins_encode %{
14829     __ evcvttss2sisl($dst$$Register, $src$$Address);
14830   %}
14831   ins_pipe(pipe_slow);
14832 %}
14833 
14834 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14835 %{
14836   predicate(!VM_Version::supports_avx10_2());
14837   match(Set dst (ConvF2L src));
14838   effect(KILL cr);
14839   format %{ "convert_f2l $dst, $src"%}
14840   ins_encode %{
14841     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14842   %}
14843   ins_pipe(pipe_slow);
14844 %}
14845 
14846 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14847 %{
14848   predicate(VM_Version::supports_avx10_2());
14849   match(Set dst (ConvF2L src));
14850   format %{ "evcvttss2sisq $dst, $src" %}
14851   ins_encode %{
14852     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14853   %}
14854   ins_pipe(pipe_slow);
14855 %}
14856 
14857 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14858 %{
14859   predicate(VM_Version::supports_avx10_2());
14860   match(Set dst (ConvF2L (LoadF src)));
14861   format %{ "evcvttss2sisq $dst, $src" %}
14862   ins_encode %{
14863     __ evcvttss2sisq($dst$$Register, $src$$Address);
14864   %}
14865   ins_pipe(pipe_slow);
14866 %}
14867 
14868 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14869 %{
14870   predicate(!VM_Version::supports_avx10_2());
14871   match(Set dst (ConvD2I src));
14872   effect(KILL cr);
14873   format %{ "convert_d2i $dst, $src"%}
14874   ins_encode %{
14875     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14876   %}
14877   ins_pipe(pipe_slow);
14878 %}
14879 
14880 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14881 %{
14882   predicate(VM_Version::supports_avx10_2());
14883   match(Set dst (ConvD2I src));
14884   format %{ "evcvttsd2sisl $dst, $src" %}
14885   ins_encode %{
14886     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14887   %}
14888   ins_pipe(pipe_slow);
14889 %}
14890 
14891 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14892 %{
14893   predicate(VM_Version::supports_avx10_2());
14894   match(Set dst (ConvD2I (LoadD src)));
14895   format %{ "evcvttsd2sisl $dst, $src" %}
14896   ins_encode %{
14897     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14898   %}
14899   ins_pipe(pipe_slow);
14900 %}
14901 
14902 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14903 %{
14904   predicate(!VM_Version::supports_avx10_2());
14905   match(Set dst (ConvD2L src));
14906   effect(KILL cr);
14907   format %{ "convert_d2l $dst, $src"%}
14908   ins_encode %{
14909     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14910   %}
14911   ins_pipe(pipe_slow);
14912 %}
14913 
14914 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14915 %{
14916   predicate(VM_Version::supports_avx10_2());
14917   match(Set dst (ConvD2L src));
14918   format %{ "evcvttsd2sisq $dst, $src" %}
14919   ins_encode %{
14920     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14921   %}
14922   ins_pipe(pipe_slow);
14923 %}
14924 
14925 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14926 %{
14927   predicate(VM_Version::supports_avx10_2());
14928   match(Set dst (ConvD2L (LoadD src)));
14929   format %{ "evcvttsd2sisq $dst, $src" %}
14930   ins_encode %{
14931     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14932   %}
14933   ins_pipe(pipe_slow);
14934 %}
14935 
14936 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14937 %{
14938   match(Set dst (RoundD src));
14939   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14940   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14941   ins_encode %{
14942     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14943   %}
14944   ins_pipe(pipe_slow);
14945 %}
14946 
14947 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14948 %{
14949   match(Set dst (RoundF src));
14950   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14951   format %{ "round_float $dst,$src" %}
14952   ins_encode %{
14953     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14954   %}
14955   ins_pipe(pipe_slow);
14956 %}
14957 
14958 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14959 %{
14960   predicate(!UseXmmI2F);
14961   match(Set dst (ConvI2F src));
14962 
14963   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14964   ins_encode %{
14965     if (UseAVX > 0) {
14966       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14967     }
14968     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14969   %}
14970   ins_pipe(pipe_slow); // XXX
14971 %}
14972 
14973 instruct convI2F_reg_mem(regF dst, memory src)
14974 %{
14975   predicate(UseAVX == 0);
14976   match(Set dst (ConvI2F (LoadI src)));
14977 
14978   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14979   ins_encode %{
14980     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14981   %}
14982   ins_pipe(pipe_slow); // XXX
14983 %}
14984 
14985 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14986 %{
14987   predicate(!UseXmmI2D);
14988   match(Set dst (ConvI2D src));
14989 
14990   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14991   ins_encode %{
14992     if (UseAVX > 0) {
14993       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14994     }
14995     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14996   %}
14997   ins_pipe(pipe_slow); // XXX
14998 %}
14999 
15000 instruct convI2D_reg_mem(regD dst, memory src)
15001 %{
15002   predicate(UseAVX == 0);
15003   match(Set dst (ConvI2D (LoadI src)));
15004 
15005   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
15006   ins_encode %{
15007     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
15008   %}
15009   ins_pipe(pipe_slow); // XXX
15010 %}
15011 
15012 instruct convXI2F_reg(regF dst, rRegI src)
15013 %{
15014   predicate(UseXmmI2F);
15015   match(Set dst (ConvI2F src));
15016 
15017   format %{ "movdl $dst, $src\n\t"
15018             "cvtdq2psl $dst, $dst\t# i2f" %}
15019   ins_encode %{
15020     __ movdl($dst$$XMMRegister, $src$$Register);
15021     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
15022   %}
15023   ins_pipe(pipe_slow); // XXX
15024 %}
15025 
15026 instruct convXI2D_reg(regD dst, rRegI src)
15027 %{
15028   predicate(UseXmmI2D);
15029   match(Set dst (ConvI2D src));
15030 
15031   format %{ "movdl $dst, $src\n\t"
15032             "cvtdq2pdl $dst, $dst\t# i2d" %}
15033   ins_encode %{
15034     __ movdl($dst$$XMMRegister, $src$$Register);
15035     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
15036   %}
15037   ins_pipe(pipe_slow); // XXX
15038 %}
15039 
15040 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
15041 %{
15042   match(Set dst (ConvL2F src));
15043 
15044   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
15045   ins_encode %{
15046     if (UseAVX > 0) {
15047       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
15048     }
15049     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
15050   %}
15051   ins_pipe(pipe_slow); // XXX
15052 %}
15053 
15054 instruct convL2F_reg_mem(regF dst, memory src)
15055 %{
15056   predicate(UseAVX == 0);
15057   match(Set dst (ConvL2F (LoadL src)));
15058 
15059   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
15060   ins_encode %{
15061     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
15062   %}
15063   ins_pipe(pipe_slow); // XXX
15064 %}
15065 
15066 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
15067 %{
15068   match(Set dst (ConvL2D src));
15069 
15070   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15071   ins_encode %{
15072     if (UseAVX > 0) {
15073       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
15074     }
15075     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
15076   %}
15077   ins_pipe(pipe_slow); // XXX
15078 %}
15079 
15080 instruct convL2D_reg_mem(regD dst, memory src)
15081 %{
15082   predicate(UseAVX == 0);
15083   match(Set dst (ConvL2D (LoadL src)));
15084 
15085   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15086   ins_encode %{
15087     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15088   %}
15089   ins_pipe(pipe_slow); // XXX
15090 %}
15091 
15092 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15093 %{
15094   match(Set dst (ConvI2L src));
15095 
15096   ins_cost(125);
15097   format %{ "movslq  $dst, $src\t# i2l" %}
15098   ins_encode %{
15099     __ movslq($dst$$Register, $src$$Register);
15100   %}
15101   ins_pipe(ialu_reg_reg);
15102 %}
15103 
15104 // Zero-extend convert int to long
15105 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15106 %{
15107   match(Set dst (AndL (ConvI2L src) mask));
15108 
15109   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15110   ins_encode %{
15111     if ($dst$$reg != $src$$reg) {
15112       __ movl($dst$$Register, $src$$Register);
15113     }
15114   %}
15115   ins_pipe(ialu_reg_reg);
15116 %}
15117 
15118 // Zero-extend convert int to long
15119 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15120 %{
15121   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15122 
15123   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15124   ins_encode %{
15125     __ movl($dst$$Register, $src$$Address);
15126   %}
15127   ins_pipe(ialu_reg_mem);
15128 %}
15129 
15130 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15131 %{
15132   match(Set dst (AndL src mask));
15133 
15134   format %{ "movl    $dst, $src\t# zero-extend long" %}
15135   ins_encode %{
15136     __ movl($dst$$Register, $src$$Register);
15137   %}
15138   ins_pipe(ialu_reg_reg);
15139 %}
15140 
15141 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15142 %{
15143   match(Set dst (ConvL2I src));
15144 
15145   format %{ "movl    $dst, $src\t# l2i" %}
15146   ins_encode %{
15147     __ movl($dst$$Register, $src$$Register);
15148   %}
15149   ins_pipe(ialu_reg_reg);
15150 %}
15151 
15152 
15153 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15154   match(Set dst (MoveF2I src));
15155   effect(DEF dst, USE src);
15156 
15157   ins_cost(125);
15158   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
15159   ins_encode %{
15160     __ movl($dst$$Register, Address(rsp, $src$$disp));
15161   %}
15162   ins_pipe(ialu_reg_mem);
15163 %}
15164 
15165 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15166   match(Set dst (MoveI2F src));
15167   effect(DEF dst, USE src);
15168 
15169   ins_cost(125);
15170   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
15171   ins_encode %{
15172     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15173   %}
15174   ins_pipe(pipe_slow);
15175 %}
15176 
15177 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15178   match(Set dst (MoveD2L src));
15179   effect(DEF dst, USE src);
15180 
15181   ins_cost(125);
15182   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
15183   ins_encode %{
15184     __ movq($dst$$Register, Address(rsp, $src$$disp));
15185   %}
15186   ins_pipe(ialu_reg_mem);
15187 %}
15188 
15189 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15190   predicate(!UseXmmLoadAndClearUpper);
15191   match(Set dst (MoveL2D src));
15192   effect(DEF dst, USE src);
15193 
15194   ins_cost(125);
15195   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
15196   ins_encode %{
15197     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15198   %}
15199   ins_pipe(pipe_slow);
15200 %}
15201 
15202 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15203   predicate(UseXmmLoadAndClearUpper);
15204   match(Set dst (MoveL2D src));
15205   effect(DEF dst, USE src);
15206 
15207   ins_cost(125);
15208   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
15209   ins_encode %{
15210     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15211   %}
15212   ins_pipe(pipe_slow);
15213 %}
15214 
15215 
15216 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15217   match(Set dst (MoveF2I src));
15218   effect(DEF dst, USE src);
15219 
15220   ins_cost(95); // XXX
15221   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
15222   ins_encode %{
15223     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15224   %}
15225   ins_pipe(pipe_slow);
15226 %}
15227 
15228 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15229   match(Set dst (MoveI2F src));
15230   effect(DEF dst, USE src);
15231 
15232   ins_cost(100);
15233   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
15234   ins_encode %{
15235     __ movl(Address(rsp, $dst$$disp), $src$$Register);
15236   %}
15237   ins_pipe( ialu_mem_reg );
15238 %}
15239 
15240 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15241   match(Set dst (MoveD2L src));
15242   effect(DEF dst, USE src);
15243 
15244   ins_cost(95); // XXX
15245   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
15246   ins_encode %{
15247     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15248   %}
15249   ins_pipe(pipe_slow);
15250 %}
15251 
15252 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15253   match(Set dst (MoveL2D src));
15254   effect(DEF dst, USE src);
15255 
15256   ins_cost(100);
15257   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
15258   ins_encode %{
15259     __ movq(Address(rsp, $dst$$disp), $src$$Register);
15260   %}
15261   ins_pipe(ialu_mem_reg);
15262 %}
15263 
15264 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15265   match(Set dst (MoveF2I src));
15266   effect(DEF dst, USE src);
15267   ins_cost(85);
15268   format %{ "movd    $dst,$src\t# MoveF2I" %}
15269   ins_encode %{
15270     __ movdl($dst$$Register, $src$$XMMRegister);
15271   %}
15272   ins_pipe( pipe_slow );
15273 %}
15274 
15275 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15276   match(Set dst (MoveD2L src));
15277   effect(DEF dst, USE src);
15278   ins_cost(85);
15279   format %{ "movd    $dst,$src\t# MoveD2L" %}
15280   ins_encode %{
15281     __ movdq($dst$$Register, $src$$XMMRegister);
15282   %}
15283   ins_pipe( pipe_slow );
15284 %}
15285 
15286 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15287   match(Set dst (MoveI2F src));
15288   effect(DEF dst, USE src);
15289   ins_cost(100);
15290   format %{ "movd    $dst,$src\t# MoveI2F" %}
15291   ins_encode %{
15292     __ movdl($dst$$XMMRegister, $src$$Register);
15293   %}
15294   ins_pipe( pipe_slow );
15295 %}
15296 
15297 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15298   match(Set dst (MoveL2D src));
15299   effect(DEF dst, USE src);
15300   ins_cost(100);
15301   format %{ "movd    $dst,$src\t# MoveL2D" %}
15302   ins_encode %{
15303      __ movdq($dst$$XMMRegister, $src$$Register);
15304   %}
15305   ins_pipe( pipe_slow );
15306 %}
15307 
15308 
15309 // Fast clearing of an array
15310 // Small non-constant lenght ClearArray for non-AVX512 targets.
15311 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15312                   Universe dummy, rFlagsReg cr)
15313 %{
15314   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15315   match(Set dummy (ClearArray (Binary cnt base) val));
15316   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15317 
15318   format %{ $$template
15319     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15320     $$emit$$"jg      LARGE\n\t"
15321     $$emit$$"dec     rcx\n\t"
15322     $$emit$$"js      DONE\t# Zero length\n\t"
15323     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15324     $$emit$$"dec     rcx\n\t"
15325     $$emit$$"jge     LOOP\n\t"
15326     $$emit$$"jmp     DONE\n\t"
15327     $$emit$$"# LARGE:\n\t"
15328     if (UseFastStosb) {
15329        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15330        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15331     } else if (UseXMMForObjInit) {
15332        $$emit$$"movdq   $tmp, $val\n\t"
15333        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15334        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15335        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15336        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15337        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15338        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15339        $$emit$$"add     0x40,rax\n\t"
15340        $$emit$$"# L_zero_64_bytes:\n\t"
15341        $$emit$$"sub     0x8,rcx\n\t"
15342        $$emit$$"jge     L_loop\n\t"
15343        $$emit$$"add     0x4,rcx\n\t"
15344        $$emit$$"jl      L_tail\n\t"
15345        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15346        $$emit$$"add     0x20,rax\n\t"
15347        $$emit$$"sub     0x4,rcx\n\t"
15348        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15349        $$emit$$"add     0x4,rcx\n\t"
15350        $$emit$$"jle     L_end\n\t"
15351        $$emit$$"dec     rcx\n\t"
15352        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15353        $$emit$$"vmovq   xmm0,(rax)\n\t"
15354        $$emit$$"add     0x8,rax\n\t"
15355        $$emit$$"dec     rcx\n\t"
15356        $$emit$$"jge     L_sloop\n\t"
15357        $$emit$$"# L_end:\n\t"
15358     } else {
15359        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15360     }
15361     $$emit$$"# DONE"
15362   %}
15363   ins_encode %{
15364     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15365                  $tmp$$XMMRegister, false, false);
15366   %}
15367   ins_pipe(pipe_slow);
15368 %}
15369 
15370 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15371                             Universe dummy, rFlagsReg cr)
15372 %{
15373   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15374   match(Set dummy (ClearArray (Binary cnt base) val));
15375   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15376 
15377   format %{ $$template
15378     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15379     $$emit$$"jg      LARGE\n\t"
15380     $$emit$$"dec     rcx\n\t"
15381     $$emit$$"js      DONE\t# Zero length\n\t"
15382     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15383     $$emit$$"dec     rcx\n\t"
15384     $$emit$$"jge     LOOP\n\t"
15385     $$emit$$"jmp     DONE\n\t"
15386     $$emit$$"# LARGE:\n\t"
15387     if (UseXMMForObjInit) {
15388        $$emit$$"movdq   $tmp, $val\n\t"
15389        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15390        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15391        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15392        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15393        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15394        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15395        $$emit$$"add     0x40,rax\n\t"
15396        $$emit$$"# L_zero_64_bytes:\n\t"
15397        $$emit$$"sub     0x8,rcx\n\t"
15398        $$emit$$"jge     L_loop\n\t"
15399        $$emit$$"add     0x4,rcx\n\t"
15400        $$emit$$"jl      L_tail\n\t"
15401        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15402        $$emit$$"add     0x20,rax\n\t"
15403        $$emit$$"sub     0x4,rcx\n\t"
15404        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15405        $$emit$$"add     0x4,rcx\n\t"
15406        $$emit$$"jle     L_end\n\t"
15407        $$emit$$"dec     rcx\n\t"
15408        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15409        $$emit$$"vmovq   xmm0,(rax)\n\t"
15410        $$emit$$"add     0x8,rax\n\t"
15411        $$emit$$"dec     rcx\n\t"
15412        $$emit$$"jge     L_sloop\n\t"
15413        $$emit$$"# L_end:\n\t"
15414     } else {
15415        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15416     }
15417     $$emit$$"# DONE"
15418   %}
15419   ins_encode %{
15420     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15421                  $tmp$$XMMRegister, false, true);
15422   %}
15423   ins_pipe(pipe_slow);
15424 %}
15425 
15426 // Small non-constant length ClearArray for AVX512 targets.
15427 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15428                        Universe dummy, rFlagsReg cr)
15429 %{
15430   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15431   match(Set dummy (ClearArray (Binary cnt base) val));
15432   ins_cost(125);
15433   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15434 
15435   format %{ $$template
15436     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15437     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15438     $$emit$$"jg      LARGE\n\t"
15439     $$emit$$"dec     rcx\n\t"
15440     $$emit$$"js      DONE\t# Zero length\n\t"
15441     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15442     $$emit$$"dec     rcx\n\t"
15443     $$emit$$"jge     LOOP\n\t"
15444     $$emit$$"jmp     DONE\n\t"
15445     $$emit$$"# LARGE:\n\t"
15446     if (UseFastStosb) {
15447        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15448        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15449     } else if (UseXMMForObjInit) {
15450        $$emit$$"mov     rdi,rax\n\t"
15451        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15452        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15453        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15454        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15455        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15456        $$emit$$"add     0x40,rax\n\t"
15457        $$emit$$"# L_zero_64_bytes:\n\t"
15458        $$emit$$"sub     0x8,rcx\n\t"
15459        $$emit$$"jge     L_loop\n\t"
15460        $$emit$$"add     0x4,rcx\n\t"
15461        $$emit$$"jl      L_tail\n\t"
15462        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15463        $$emit$$"add     0x20,rax\n\t"
15464        $$emit$$"sub     0x4,rcx\n\t"
15465        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15466        $$emit$$"add     0x4,rcx\n\t"
15467        $$emit$$"jle     L_end\n\t"
15468        $$emit$$"dec     rcx\n\t"
15469        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15470        $$emit$$"vmovq   xmm0,(rax)\n\t"
15471        $$emit$$"add     0x8,rax\n\t"
15472        $$emit$$"dec     rcx\n\t"
15473        $$emit$$"jge     L_sloop\n\t"
15474        $$emit$$"# L_end:\n\t"
15475     } else {
15476        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15477     }
15478     $$emit$$"# DONE"
15479   %}
15480   ins_encode %{
15481     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15482                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15483   %}
15484   ins_pipe(pipe_slow);
15485 %}
15486 
15487 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15488                                  Universe dummy, rFlagsReg cr)
15489 %{
15490   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15491   match(Set dummy (ClearArray (Binary cnt base) val));
15492   ins_cost(125);
15493   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15494 
15495   format %{ $$template
15496     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15497     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15498     $$emit$$"jg      LARGE\n\t"
15499     $$emit$$"dec     rcx\n\t"
15500     $$emit$$"js      DONE\t# Zero length\n\t"
15501     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15502     $$emit$$"dec     rcx\n\t"
15503     $$emit$$"jge     LOOP\n\t"
15504     $$emit$$"jmp     DONE\n\t"
15505     $$emit$$"# LARGE:\n\t"
15506     if (UseFastStosb) {
15507        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15508        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15509     } else if (UseXMMForObjInit) {
15510        $$emit$$"mov     rdi,rax\n\t"
15511        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15512        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15513        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15514        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15515        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15516        $$emit$$"add     0x40,rax\n\t"
15517        $$emit$$"# L_zero_64_bytes:\n\t"
15518        $$emit$$"sub     0x8,rcx\n\t"
15519        $$emit$$"jge     L_loop\n\t"
15520        $$emit$$"add     0x4,rcx\n\t"
15521        $$emit$$"jl      L_tail\n\t"
15522        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15523        $$emit$$"add     0x20,rax\n\t"
15524        $$emit$$"sub     0x4,rcx\n\t"
15525        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15526        $$emit$$"add     0x4,rcx\n\t"
15527        $$emit$$"jle     L_end\n\t"
15528        $$emit$$"dec     rcx\n\t"
15529        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15530        $$emit$$"vmovq   xmm0,(rax)\n\t"
15531        $$emit$$"add     0x8,rax\n\t"
15532        $$emit$$"dec     rcx\n\t"
15533        $$emit$$"jge     L_sloop\n\t"
15534        $$emit$$"# L_end:\n\t"
15535     } else {
15536        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15537     }
15538     $$emit$$"# DONE"
15539   %}
15540   ins_encode %{
15541     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15542                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15543   %}
15544   ins_pipe(pipe_slow);
15545 %}
15546 
15547 // Large non-constant length ClearArray for non-AVX512 targets.
15548 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15549                         Universe dummy, rFlagsReg cr)
15550 %{
15551   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15552   match(Set dummy (ClearArray (Binary cnt base) val));
15553   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15554 
15555   format %{ $$template
15556     if (UseFastStosb) {
15557        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15558        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15559     } else if (UseXMMForObjInit) {
15560        $$emit$$"movdq   $tmp, $val\n\t"
15561        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15562        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15563        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15564        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15565        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15566        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15567        $$emit$$"add     0x40,rax\n\t"
15568        $$emit$$"# L_zero_64_bytes:\n\t"
15569        $$emit$$"sub     0x8,rcx\n\t"
15570        $$emit$$"jge     L_loop\n\t"
15571        $$emit$$"add     0x4,rcx\n\t"
15572        $$emit$$"jl      L_tail\n\t"
15573        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15574        $$emit$$"add     0x20,rax\n\t"
15575        $$emit$$"sub     0x4,rcx\n\t"
15576        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15577        $$emit$$"add     0x4,rcx\n\t"
15578        $$emit$$"jle     L_end\n\t"
15579        $$emit$$"dec     rcx\n\t"
15580        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15581        $$emit$$"vmovq   xmm0,(rax)\n\t"
15582        $$emit$$"add     0x8,rax\n\t"
15583        $$emit$$"dec     rcx\n\t"
15584        $$emit$$"jge     L_sloop\n\t"
15585        $$emit$$"# L_end:\n\t"
15586     } else {
15587        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15588     }
15589   %}
15590   ins_encode %{
15591     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15592                  $tmp$$XMMRegister, true, false);
15593   %}
15594   ins_pipe(pipe_slow);
15595 %}
15596 
15597 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15598                                   Universe dummy, rFlagsReg cr)
15599 %{
15600   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15601   match(Set dummy (ClearArray (Binary cnt base) val));
15602   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15603 
15604   format %{ $$template
15605     if (UseXMMForObjInit) {
15606        $$emit$$"movdq   $tmp, $val\n\t"
15607        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15608        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15609        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15610        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15611        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15612        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15613        $$emit$$"add     0x40,rax\n\t"
15614        $$emit$$"# L_zero_64_bytes:\n\t"
15615        $$emit$$"sub     0x8,rcx\n\t"
15616        $$emit$$"jge     L_loop\n\t"
15617        $$emit$$"add     0x4,rcx\n\t"
15618        $$emit$$"jl      L_tail\n\t"
15619        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15620        $$emit$$"add     0x20,rax\n\t"
15621        $$emit$$"sub     0x4,rcx\n\t"
15622        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15623        $$emit$$"add     0x4,rcx\n\t"
15624        $$emit$$"jle     L_end\n\t"
15625        $$emit$$"dec     rcx\n\t"
15626        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15627        $$emit$$"vmovq   xmm0,(rax)\n\t"
15628        $$emit$$"add     0x8,rax\n\t"
15629        $$emit$$"dec     rcx\n\t"
15630        $$emit$$"jge     L_sloop\n\t"
15631        $$emit$$"# L_end:\n\t"
15632     } else {
15633        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15634     }
15635   %}
15636   ins_encode %{
15637     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15638                  $tmp$$XMMRegister, true, true);
15639   %}
15640   ins_pipe(pipe_slow);
15641 %}
15642 
15643 // Large non-constant length ClearArray for AVX512 targets.
15644 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15645                              Universe dummy, rFlagsReg cr)
15646 %{
15647   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15648   match(Set dummy (ClearArray (Binary cnt base) val));
15649   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15650 
15651   format %{ $$template
15652     if (UseFastStosb) {
15653        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15654        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15655        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15656     } else if (UseXMMForObjInit) {
15657        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15658        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15659        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15660        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15661        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15662        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15663        $$emit$$"add     0x40,rax\n\t"
15664        $$emit$$"# L_zero_64_bytes:\n\t"
15665        $$emit$$"sub     0x8,rcx\n\t"
15666        $$emit$$"jge     L_loop\n\t"
15667        $$emit$$"add     0x4,rcx\n\t"
15668        $$emit$$"jl      L_tail\n\t"
15669        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15670        $$emit$$"add     0x20,rax\n\t"
15671        $$emit$$"sub     0x4,rcx\n\t"
15672        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15673        $$emit$$"add     0x4,rcx\n\t"
15674        $$emit$$"jle     L_end\n\t"
15675        $$emit$$"dec     rcx\n\t"
15676        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15677        $$emit$$"vmovq   xmm0,(rax)\n\t"
15678        $$emit$$"add     0x8,rax\n\t"
15679        $$emit$$"dec     rcx\n\t"
15680        $$emit$$"jge     L_sloop\n\t"
15681        $$emit$$"# L_end:\n\t"
15682     } else {
15683        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15684        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15685     }
15686   %}
15687   ins_encode %{
15688     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15689                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15690   %}
15691   ins_pipe(pipe_slow);
15692 %}
15693 
15694 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15695                                        Universe dummy, rFlagsReg cr)
15696 %{
15697   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15698   match(Set dummy (ClearArray (Binary cnt base) val));
15699   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15700 
15701   format %{ $$template
15702     if (UseFastStosb) {
15703        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15704        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15705        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15706     } else if (UseXMMForObjInit) {
15707        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15708        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15709        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15710        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15711        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15712        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15713        $$emit$$"add     0x40,rax\n\t"
15714        $$emit$$"# L_zero_64_bytes:\n\t"
15715        $$emit$$"sub     0x8,rcx\n\t"
15716        $$emit$$"jge     L_loop\n\t"
15717        $$emit$$"add     0x4,rcx\n\t"
15718        $$emit$$"jl      L_tail\n\t"
15719        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15720        $$emit$$"add     0x20,rax\n\t"
15721        $$emit$$"sub     0x4,rcx\n\t"
15722        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15723        $$emit$$"add     0x4,rcx\n\t"
15724        $$emit$$"jle     L_end\n\t"
15725        $$emit$$"dec     rcx\n\t"
15726        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15727        $$emit$$"vmovq   xmm0,(rax)\n\t"
15728        $$emit$$"add     0x8,rax\n\t"
15729        $$emit$$"dec     rcx\n\t"
15730        $$emit$$"jge     L_sloop\n\t"
15731        $$emit$$"# L_end:\n\t"
15732     } else {
15733        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15734        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15735     }
15736   %}
15737   ins_encode %{
15738     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15739                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15740   %}
15741   ins_pipe(pipe_slow);
15742 %}
15743 
15744 // Small constant length ClearArray for AVX512 targets.
15745 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15746 %{
15747   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15748             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15749   match(Set dummy (ClearArray (Binary cnt base) val));
15750   ins_cost(100);
15751   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15752   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15753   ins_encode %{
15754     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15755   %}
15756   ins_pipe(pipe_slow);
15757 %}
15758 
15759 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15760                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15761 %{
15762   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15763   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15764   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15765 
15766   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15767   ins_encode %{
15768     __ string_compare($str1$$Register, $str2$$Register,
15769                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15770                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15771   %}
15772   ins_pipe( pipe_slow );
15773 %}
15774 
15775 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15776                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15777 %{
15778   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15779   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15780   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15781 
15782   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15783   ins_encode %{
15784     __ string_compare($str1$$Register, $str2$$Register,
15785                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15786                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15787   %}
15788   ins_pipe( pipe_slow );
15789 %}
15790 
15791 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15792                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15793 %{
15794   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15795   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15796   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15797 
15798   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15799   ins_encode %{
15800     __ string_compare($str1$$Register, $str2$$Register,
15801                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15802                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15803   %}
15804   ins_pipe( pipe_slow );
15805 %}
15806 
15807 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15808                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15809 %{
15810   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15811   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15812   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15813 
15814   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15815   ins_encode %{
15816     __ string_compare($str1$$Register, $str2$$Register,
15817                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15818                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15819   %}
15820   ins_pipe( pipe_slow );
15821 %}
15822 
15823 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15824                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15825 %{
15826   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15827   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15828   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15829 
15830   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15831   ins_encode %{
15832     __ string_compare($str1$$Register, $str2$$Register,
15833                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15834                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15835   %}
15836   ins_pipe( pipe_slow );
15837 %}
15838 
15839 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15840                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15841 %{
15842   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15843   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15844   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15845 
15846   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15847   ins_encode %{
15848     __ string_compare($str1$$Register, $str2$$Register,
15849                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15850                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15851   %}
15852   ins_pipe( pipe_slow );
15853 %}
15854 
15855 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15856                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15857 %{
15858   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15859   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15860   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15861 
15862   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15863   ins_encode %{
15864     __ string_compare($str2$$Register, $str1$$Register,
15865                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15866                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15867   %}
15868   ins_pipe( pipe_slow );
15869 %}
15870 
15871 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15872                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15873 %{
15874   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15875   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15876   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15877 
15878   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15879   ins_encode %{
15880     __ string_compare($str2$$Register, $str1$$Register,
15881                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15882                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15883   %}
15884   ins_pipe( pipe_slow );
15885 %}
15886 
15887 // fast search of substring with known size.
15888 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15889                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15890 %{
15891   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15892   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15893   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15894 
15895   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15896   ins_encode %{
15897     int icnt2 = (int)$int_cnt2$$constant;
15898     if (icnt2 >= 16) {
15899       // IndexOf for constant substrings with size >= 16 elements
15900       // which don't need to be loaded through stack.
15901       __ string_indexofC8($str1$$Register, $str2$$Register,
15902                           $cnt1$$Register, $cnt2$$Register,
15903                           icnt2, $result$$Register,
15904                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15905     } else {
15906       // Small strings are loaded through stack if they cross page boundary.
15907       __ string_indexof($str1$$Register, $str2$$Register,
15908                         $cnt1$$Register, $cnt2$$Register,
15909                         icnt2, $result$$Register,
15910                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15911     }
15912   %}
15913   ins_pipe( pipe_slow );
15914 %}
15915 
15916 // fast search of substring with known size.
15917 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15918                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15919 %{
15920   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15921   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15922   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15923 
15924   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15925   ins_encode %{
15926     int icnt2 = (int)$int_cnt2$$constant;
15927     if (icnt2 >= 8) {
15928       // IndexOf for constant substrings with size >= 8 elements
15929       // which don't need to be loaded through stack.
15930       __ string_indexofC8($str1$$Register, $str2$$Register,
15931                           $cnt1$$Register, $cnt2$$Register,
15932                           icnt2, $result$$Register,
15933                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15934     } else {
15935       // Small strings are loaded through stack if they cross page boundary.
15936       __ string_indexof($str1$$Register, $str2$$Register,
15937                         $cnt1$$Register, $cnt2$$Register,
15938                         icnt2, $result$$Register,
15939                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15940     }
15941   %}
15942   ins_pipe( pipe_slow );
15943 %}
15944 
15945 // fast search of substring with known size.
15946 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15947                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15948 %{
15949   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15950   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15951   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15952 
15953   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15954   ins_encode %{
15955     int icnt2 = (int)$int_cnt2$$constant;
15956     if (icnt2 >= 8) {
15957       // IndexOf for constant substrings with size >= 8 elements
15958       // which don't need to be loaded through stack.
15959       __ string_indexofC8($str1$$Register, $str2$$Register,
15960                           $cnt1$$Register, $cnt2$$Register,
15961                           icnt2, $result$$Register,
15962                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15963     } else {
15964       // Small strings are loaded through stack if they cross page boundary.
15965       __ string_indexof($str1$$Register, $str2$$Register,
15966                         $cnt1$$Register, $cnt2$$Register,
15967                         icnt2, $result$$Register,
15968                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15969     }
15970   %}
15971   ins_pipe( pipe_slow );
15972 %}
15973 
15974 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15975                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15976 %{
15977   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15978   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15979   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15980 
15981   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15982   ins_encode %{
15983     __ string_indexof($str1$$Register, $str2$$Register,
15984                       $cnt1$$Register, $cnt2$$Register,
15985                       (-1), $result$$Register,
15986                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15987   %}
15988   ins_pipe( pipe_slow );
15989 %}
15990 
15991 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15992                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15993 %{
15994   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15995   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15996   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15997 
15998   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15999   ins_encode %{
16000     __ string_indexof($str1$$Register, $str2$$Register,
16001                       $cnt1$$Register, $cnt2$$Register,
16002                       (-1), $result$$Register,
16003                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
16004   %}
16005   ins_pipe( pipe_slow );
16006 %}
16007 
16008 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
16009                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
16010 %{
16011   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
16012   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16013   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
16014 
16015   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
16016   ins_encode %{
16017     __ string_indexof($str1$$Register, $str2$$Register,
16018                       $cnt1$$Register, $cnt2$$Register,
16019                       (-1), $result$$Register,
16020                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
16021   %}
16022   ins_pipe( pipe_slow );
16023 %}
16024 
16025 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
16026                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
16027 %{
16028   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
16029   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16030   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
16031   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
16032   ins_encode %{
16033     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
16034                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
16035   %}
16036   ins_pipe( pipe_slow );
16037 %}
16038 
16039 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
16040                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
16041 %{
16042   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
16043   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16044   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
16045   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
16046   ins_encode %{
16047     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
16048                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
16049   %}
16050   ins_pipe( pipe_slow );
16051 %}
16052 
16053 // fast string equals
16054 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16055                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
16056 %{
16057   predicate(!VM_Version::supports_avx512vlbw());
16058   match(Set result (StrEquals (Binary str1 str2) cnt));
16059   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16060 
16061   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
16062   ins_encode %{
16063     __ arrays_equals(false, $str1$$Register, $str2$$Register,
16064                      $cnt$$Register, $result$$Register, $tmp3$$Register,
16065                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16066   %}
16067   ins_pipe( pipe_slow );
16068 %}
16069 
16070 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16071                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
16072 %{
16073   predicate(VM_Version::supports_avx512vlbw());
16074   match(Set result (StrEquals (Binary str1 str2) cnt));
16075   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16076 
16077   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
16078   ins_encode %{
16079     __ arrays_equals(false, $str1$$Register, $str2$$Register,
16080                      $cnt$$Register, $result$$Register, $tmp3$$Register,
16081                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16082   %}
16083   ins_pipe( pipe_slow );
16084 %}
16085 
16086 // fast array equals
16087 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16088                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16089 %{
16090   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16091   match(Set result (AryEq ary1 ary2));
16092   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16093 
16094   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16095   ins_encode %{
16096     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16097                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16098                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16099   %}
16100   ins_pipe( pipe_slow );
16101 %}
16102 
16103 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16104                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16105 %{
16106   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16107   match(Set result (AryEq ary1 ary2));
16108   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16109 
16110   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16111   ins_encode %{
16112     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16113                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16114                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16115   %}
16116   ins_pipe( pipe_slow );
16117 %}
16118 
16119 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16120                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16121 %{
16122   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16123   match(Set result (AryEq ary1 ary2));
16124   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16125 
16126   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16127   ins_encode %{
16128     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16129                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16130                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
16131   %}
16132   ins_pipe( pipe_slow );
16133 %}
16134 
16135 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16136                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16137 %{
16138   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16139   match(Set result (AryEq ary1 ary2));
16140   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16141 
16142   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16143   ins_encode %{
16144     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16145                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16146                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
16147   %}
16148   ins_pipe( pipe_slow );
16149 %}
16150 
16151 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
16152                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
16153                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
16154                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
16155                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
16156 %{
16157   predicate(UseAVX >= 2);
16158   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
16159   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
16160          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
16161          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
16162          USE basic_type, KILL cr);
16163 
16164   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
16165   ins_encode %{
16166     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
16167                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16168                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
16169                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
16170                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
16171                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
16172                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
16173   %}
16174   ins_pipe( pipe_slow );
16175 %}
16176 
16177 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16178                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
16179 %{
16180   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16181   match(Set result (CountPositives ary1 len));
16182   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16183 
16184   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
16185   ins_encode %{
16186     __ count_positives($ary1$$Register, $len$$Register,
16187                        $result$$Register, $tmp3$$Register,
16188                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
16189   %}
16190   ins_pipe( pipe_slow );
16191 %}
16192 
16193 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16194                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
16195 %{
16196   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16197   match(Set result (CountPositives ary1 len));
16198   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16199 
16200   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
16201   ins_encode %{
16202     __ count_positives($ary1$$Register, $len$$Register,
16203                        $result$$Register, $tmp3$$Register,
16204                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
16205   %}
16206   ins_pipe( pipe_slow );
16207 %}
16208 
16209 // fast char[] to byte[] compression
16210 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16211                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16212   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16213   match(Set result (StrCompressedCopy src (Binary dst len)));
16214   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
16215          USE_KILL len, KILL tmp5, KILL cr);
16216 
16217   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
16218   ins_encode %{
16219     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16220                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16221                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16222                            knoreg, knoreg);
16223   %}
16224   ins_pipe( pipe_slow );
16225 %}
16226 
16227 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16228                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16229   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16230   match(Set result (StrCompressedCopy src (Binary dst len)));
16231   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
16232          USE_KILL len, KILL tmp5, KILL cr);
16233 
16234   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
16235   ins_encode %{
16236     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16237                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16238                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16239                            $ktmp1$$KRegister, $ktmp2$$KRegister);
16240   %}
16241   ins_pipe( pipe_slow );
16242 %}
16243 // fast byte[] to char[] inflation
16244 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16245                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
16246   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16247   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16248   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16249 
16250   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16251   ins_encode %{
16252     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16253                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
16254   %}
16255   ins_pipe( pipe_slow );
16256 %}
16257 
16258 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16259                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
16260   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16261   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16262   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16263 
16264   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16265   ins_encode %{
16266     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16267                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
16268   %}
16269   ins_pipe( pipe_slow );
16270 %}
16271 
16272 // encode char[] to byte[] in ISO_8859_1
16273 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16274                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16275                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16276   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
16277   match(Set result (EncodeISOArray src (Binary dst len)));
16278   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16279 
16280   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16281   ins_encode %{
16282     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16283                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16284                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
16285   %}
16286   ins_pipe( pipe_slow );
16287 %}
16288 
16289 // encode char[] to byte[] in ASCII
16290 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16291                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16292                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16293   predicate(((EncodeISOArrayNode*)n)->is_ascii());
16294   match(Set result (EncodeISOArray src (Binary dst len)));
16295   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16296 
16297   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16298   ins_encode %{
16299     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16300                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16301                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16302   %}
16303   ins_pipe( pipe_slow );
16304 %}
16305 
16306 //----------Overflow Math Instructions-----------------------------------------
16307 
16308 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16309 %{
16310   match(Set cr (OverflowAddI op1 op2));
16311   effect(DEF cr, USE_KILL op1, USE op2);
16312 
16313   format %{ "addl    $op1, $op2\t# overflow check int" %}
16314 
16315   ins_encode %{
16316     __ addl($op1$$Register, $op2$$Register);
16317   %}
16318   ins_pipe(ialu_reg_reg);
16319 %}
16320 
16321 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16322 %{
16323   match(Set cr (OverflowAddI op1 op2));
16324   effect(DEF cr, USE_KILL op1, USE op2);
16325 
16326   format %{ "addl    $op1, $op2\t# overflow check int" %}
16327 
16328   ins_encode %{
16329     __ addl($op1$$Register, $op2$$constant);
16330   %}
16331   ins_pipe(ialu_reg_reg);
16332 %}
16333 
16334 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16335 %{
16336   match(Set cr (OverflowAddL op1 op2));
16337   effect(DEF cr, USE_KILL op1, USE op2);
16338 
16339   format %{ "addq    $op1, $op2\t# overflow check long" %}
16340   ins_encode %{
16341     __ addq($op1$$Register, $op2$$Register);
16342   %}
16343   ins_pipe(ialu_reg_reg);
16344 %}
16345 
16346 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16347 %{
16348   match(Set cr (OverflowAddL op1 op2));
16349   effect(DEF cr, USE_KILL op1, USE op2);
16350 
16351   format %{ "addq    $op1, $op2\t# overflow check long" %}
16352   ins_encode %{
16353     __ addq($op1$$Register, $op2$$constant);
16354   %}
16355   ins_pipe(ialu_reg_reg);
16356 %}
16357 
16358 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16359 %{
16360   match(Set cr (OverflowSubI op1 op2));
16361 
16362   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16363   ins_encode %{
16364     __ cmpl($op1$$Register, $op2$$Register);
16365   %}
16366   ins_pipe(ialu_reg_reg);
16367 %}
16368 
16369 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16370 %{
16371   match(Set cr (OverflowSubI op1 op2));
16372 
16373   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16374   ins_encode %{
16375     __ cmpl($op1$$Register, $op2$$constant);
16376   %}
16377   ins_pipe(ialu_reg_reg);
16378 %}
16379 
16380 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16381 %{
16382   match(Set cr (OverflowSubL op1 op2));
16383 
16384   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16385   ins_encode %{
16386     __ cmpq($op1$$Register, $op2$$Register);
16387   %}
16388   ins_pipe(ialu_reg_reg);
16389 %}
16390 
16391 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16392 %{
16393   match(Set cr (OverflowSubL op1 op2));
16394 
16395   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16396   ins_encode %{
16397     __ cmpq($op1$$Register, $op2$$constant);
16398   %}
16399   ins_pipe(ialu_reg_reg);
16400 %}
16401 
16402 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16403 %{
16404   match(Set cr (OverflowSubI zero op2));
16405   effect(DEF cr, USE_KILL op2);
16406 
16407   format %{ "negl    $op2\t# overflow check int" %}
16408   ins_encode %{
16409     __ negl($op2$$Register);
16410   %}
16411   ins_pipe(ialu_reg_reg);
16412 %}
16413 
16414 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16415 %{
16416   match(Set cr (OverflowSubL zero op2));
16417   effect(DEF cr, USE_KILL op2);
16418 
16419   format %{ "negq    $op2\t# overflow check long" %}
16420   ins_encode %{
16421     __ negq($op2$$Register);
16422   %}
16423   ins_pipe(ialu_reg_reg);
16424 %}
16425 
16426 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16427 %{
16428   match(Set cr (OverflowMulI op1 op2));
16429   effect(DEF cr, USE_KILL op1, USE op2);
16430 
16431   format %{ "imull    $op1, $op2\t# overflow check int" %}
16432   ins_encode %{
16433     __ imull($op1$$Register, $op2$$Register);
16434   %}
16435   ins_pipe(ialu_reg_reg_alu0);
16436 %}
16437 
16438 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16439 %{
16440   match(Set cr (OverflowMulI op1 op2));
16441   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16442 
16443   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
16444   ins_encode %{
16445     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16446   %}
16447   ins_pipe(ialu_reg_reg_alu0);
16448 %}
16449 
16450 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16451 %{
16452   match(Set cr (OverflowMulL op1 op2));
16453   effect(DEF cr, USE_KILL op1, USE op2);
16454 
16455   format %{ "imulq    $op1, $op2\t# overflow check long" %}
16456   ins_encode %{
16457     __ imulq($op1$$Register, $op2$$Register);
16458   %}
16459   ins_pipe(ialu_reg_reg_alu0);
16460 %}
16461 
16462 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16463 %{
16464   match(Set cr (OverflowMulL op1 op2));
16465   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16466 
16467   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
16468   ins_encode %{
16469     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16470   %}
16471   ins_pipe(ialu_reg_reg_alu0);
16472 %}
16473 
16474 
16475 //----------Control Flow Instructions------------------------------------------
16476 // Signed compare Instructions
16477 
16478 // XXX more variants!!
16479 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16480 %{
16481   match(Set cr (CmpI op1 op2));
16482   effect(DEF cr, USE op1, USE op2);
16483 
16484   format %{ "cmpl    $op1, $op2" %}
16485   ins_encode %{
16486     __ cmpl($op1$$Register, $op2$$Register);
16487   %}
16488   ins_pipe(ialu_cr_reg_reg);
16489 %}
16490 
16491 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16492 %{
16493   match(Set cr (CmpI op1 op2));
16494 
16495   format %{ "cmpl    $op1, $op2" %}
16496   ins_encode %{
16497     __ cmpl($op1$$Register, $op2$$constant);
16498   %}
16499   ins_pipe(ialu_cr_reg_imm);
16500 %}
16501 
16502 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16503 %{
16504   match(Set cr (CmpI op1 (LoadI op2)));
16505 
16506   ins_cost(500); // XXX
16507   format %{ "cmpl    $op1, $op2" %}
16508   ins_encode %{
16509     __ cmpl($op1$$Register, $op2$$Address);
16510   %}
16511   ins_pipe(ialu_cr_reg_mem);
16512 %}
16513 
16514 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16515 %{
16516   match(Set cr (CmpI src zero));
16517 
16518   format %{ "testl   $src, $src" %}
16519   ins_encode %{
16520     __ testl($src$$Register, $src$$Register);
16521   %}
16522   ins_pipe(ialu_cr_reg_imm);
16523 %}
16524 
16525 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16526 %{
16527   match(Set cr (CmpI (AndI src con) zero));
16528 
16529   format %{ "testl   $src, $con" %}
16530   ins_encode %{
16531     __ testl($src$$Register, $con$$constant);
16532   %}
16533   ins_pipe(ialu_cr_reg_imm);
16534 %}
16535 
16536 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16537 %{
16538   match(Set cr (CmpI (AndI src1 src2) zero));
16539 
16540   format %{ "testl   $src1, $src2" %}
16541   ins_encode %{
16542     __ testl($src1$$Register, $src2$$Register);
16543   %}
16544   ins_pipe(ialu_cr_reg_imm);
16545 %}
16546 
16547 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16548 %{
16549   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16550 
16551   format %{ "testl   $src, $mem" %}
16552   ins_encode %{
16553     __ testl($src$$Register, $mem$$Address);
16554   %}
16555   ins_pipe(ialu_cr_reg_mem);
16556 %}
16557 
16558 // Unsigned compare Instructions; really, same as signed except they
16559 // produce an rFlagsRegU instead of rFlagsReg.
16560 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16561 %{
16562   match(Set cr (CmpU op1 op2));
16563 
16564   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16565   ins_encode %{
16566     __ cmpl($op1$$Register, $op2$$Register);
16567   %}
16568   ins_pipe(ialu_cr_reg_reg);
16569 %}
16570 
16571 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16572 %{
16573   match(Set cr (CmpU op1 op2));
16574 
16575   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16576   ins_encode %{
16577     __ cmpl($op1$$Register, $op2$$constant);
16578   %}
16579   ins_pipe(ialu_cr_reg_imm);
16580 %}
16581 
16582 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16583 %{
16584   match(Set cr (CmpU op1 (LoadI op2)));
16585 
16586   ins_cost(500); // XXX
16587   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16588   ins_encode %{
16589     __ cmpl($op1$$Register, $op2$$Address);
16590   %}
16591   ins_pipe(ialu_cr_reg_mem);
16592 %}
16593 
16594 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16595 %{
16596   match(Set cr (CmpU src zero));
16597 
16598   format %{ "testl   $src, $src\t# unsigned" %}
16599   ins_encode %{
16600     __ testl($src$$Register, $src$$Register);
16601   %}
16602   ins_pipe(ialu_cr_reg_imm);
16603 %}
16604 
16605 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16606 %{
16607   match(Set cr (CmpP op1 op2));
16608 
16609   format %{ "cmpq    $op1, $op2\t# ptr" %}
16610   ins_encode %{
16611     __ cmpq($op1$$Register, $op2$$Register);
16612   %}
16613   ins_pipe(ialu_cr_reg_reg);
16614 %}
16615 
16616 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16617 %{
16618   match(Set cr (CmpP op1 (LoadP op2)));
16619   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16620 
16621   ins_cost(500); // XXX
16622   format %{ "cmpq    $op1, $op2\t# ptr" %}
16623   ins_encode %{
16624     __ cmpq($op1$$Register, $op2$$Address);
16625   %}
16626   ins_pipe(ialu_cr_reg_mem);
16627 %}
16628 
16629 // XXX this is generalized by compP_rReg_mem???
16630 // Compare raw pointer (used in out-of-heap check).
16631 // Only works because non-oop pointers must be raw pointers
16632 // and raw pointers have no anti-dependencies.
16633 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16634 %{
16635   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16636             n->in(2)->as_Load()->barrier_data() == 0);
16637   match(Set cr (CmpP op1 (LoadP op2)));
16638 
16639   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16640   ins_encode %{
16641     __ cmpq($op1$$Register, $op2$$Address);
16642   %}
16643   ins_pipe(ialu_cr_reg_mem);
16644 %}
16645 
16646 // This will generate a signed flags result. This should be OK since
16647 // any compare to a zero should be eq/neq.
16648 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16649 %{
16650   match(Set cr (CmpP src zero));
16651 
16652   format %{ "testq   $src, $src\t# ptr" %}
16653   ins_encode %{
16654     __ testq($src$$Register, $src$$Register);
16655   %}
16656   ins_pipe(ialu_cr_reg_imm);
16657 %}
16658 
16659 // This will generate a signed flags result. This should be OK since
16660 // any compare to a zero should be eq/neq.
16661 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16662 %{
16663   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16664             n->in(1)->as_Load()->barrier_data() == 0);
16665   match(Set cr (CmpP (LoadP op) zero));
16666 
16667   ins_cost(500); // XXX
16668   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16669   ins_encode %{
16670     __ testq($op$$Address, 0xFFFFFFFF);
16671   %}
16672   ins_pipe(ialu_cr_reg_imm);
16673 %}
16674 
16675 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16676 %{
16677   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16678             n->in(1)->as_Load()->barrier_data() == 0);
16679   match(Set cr (CmpP (LoadP mem) zero));
16680 
16681   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16682   ins_encode %{
16683     __ cmpq(r12, $mem$$Address);
16684   %}
16685   ins_pipe(ialu_cr_reg_mem);
16686 %}
16687 
16688 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16689 %{
16690   match(Set cr (CmpN op1 op2));
16691 
16692   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16693   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16694   ins_pipe(ialu_cr_reg_reg);
16695 %}
16696 
16697 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16698 %{
16699   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16700   match(Set cr (CmpN src (LoadN mem)));
16701 
16702   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16703   ins_encode %{
16704     __ cmpl($src$$Register, $mem$$Address);
16705   %}
16706   ins_pipe(ialu_cr_reg_mem);
16707 %}
16708 
16709 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16710   match(Set cr (CmpN op1 op2));
16711 
16712   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16713   ins_encode %{
16714     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16715   %}
16716   ins_pipe(ialu_cr_reg_imm);
16717 %}
16718 
16719 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16720 %{
16721   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16722   match(Set cr (CmpN src (LoadN mem)));
16723 
16724   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16725   ins_encode %{
16726     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16727   %}
16728   ins_pipe(ialu_cr_reg_mem);
16729 %}
16730 
16731 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16732   match(Set cr (CmpN op1 op2));
16733 
16734   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16735   ins_encode %{
16736     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16737   %}
16738   ins_pipe(ialu_cr_reg_imm);
16739 %}
16740 
16741 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16742 %{
16743   predicate(!UseCompactObjectHeaders);
16744   match(Set cr (CmpN src (LoadNKlass mem)));
16745 
16746   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16747   ins_encode %{
16748     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16749   %}
16750   ins_pipe(ialu_cr_reg_mem);
16751 %}
16752 
16753 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16754   match(Set cr (CmpN src zero));
16755 
16756   format %{ "testl   $src, $src\t# compressed ptr" %}
16757   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16758   ins_pipe(ialu_cr_reg_imm);
16759 %}
16760 
16761 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16762 %{
16763   predicate(CompressedOops::base() != nullptr &&
16764             n->in(1)->as_Load()->barrier_data() == 0);
16765   match(Set cr (CmpN (LoadN mem) zero));
16766 
16767   ins_cost(500); // XXX
16768   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16769   ins_encode %{
16770     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16771   %}
16772   ins_pipe(ialu_cr_reg_mem);
16773 %}
16774 
16775 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16776 %{
16777   predicate(CompressedOops::base() == nullptr &&
16778             n->in(1)->as_Load()->barrier_data() == 0);
16779   match(Set cr (CmpN (LoadN mem) zero));
16780 
16781   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16782   ins_encode %{
16783     __ cmpl(r12, $mem$$Address);
16784   %}
16785   ins_pipe(ialu_cr_reg_mem);
16786 %}
16787 
16788 // Yanked all unsigned pointer compare operations.
16789 // Pointer compares are done with CmpP which is already unsigned.
16790 
16791 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16792 %{
16793   match(Set cr (CmpL op1 op2));
16794 
16795   format %{ "cmpq    $op1, $op2" %}
16796   ins_encode %{
16797     __ cmpq($op1$$Register, $op2$$Register);
16798   %}
16799   ins_pipe(ialu_cr_reg_reg);
16800 %}
16801 
16802 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16803 %{
16804   match(Set cr (CmpL op1 op2));
16805 
16806   format %{ "cmpq    $op1, $op2" %}
16807   ins_encode %{
16808     __ cmpq($op1$$Register, $op2$$constant);
16809   %}
16810   ins_pipe(ialu_cr_reg_imm);
16811 %}
16812 
16813 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16814 %{
16815   match(Set cr (CmpL op1 (LoadL op2)));
16816 
16817   format %{ "cmpq    $op1, $op2" %}
16818   ins_encode %{
16819     __ cmpq($op1$$Register, $op2$$Address);
16820   %}
16821   ins_pipe(ialu_cr_reg_mem);
16822 %}
16823 
16824 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16825 %{
16826   match(Set cr (CmpL src zero));
16827 
16828   format %{ "testq   $src, $src" %}
16829   ins_encode %{
16830     __ testq($src$$Register, $src$$Register);
16831   %}
16832   ins_pipe(ialu_cr_reg_imm);
16833 %}
16834 
16835 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16836 %{
16837   match(Set cr (CmpL (AndL src con) zero));
16838 
16839   format %{ "testq   $src, $con\t# long" %}
16840   ins_encode %{
16841     __ testq($src$$Register, $con$$constant);
16842   %}
16843   ins_pipe(ialu_cr_reg_imm);
16844 %}
16845 
16846 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16847 %{
16848   match(Set cr (CmpL (AndL src1 src2) zero));
16849 
16850   format %{ "testq   $src1, $src2\t# long" %}
16851   ins_encode %{
16852     __ testq($src1$$Register, $src2$$Register);
16853   %}
16854   ins_pipe(ialu_cr_reg_imm);
16855 %}
16856 
16857 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16858 %{
16859   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16860 
16861   format %{ "testq   $src, $mem" %}
16862   ins_encode %{
16863     __ testq($src$$Register, $mem$$Address);
16864   %}
16865   ins_pipe(ialu_cr_reg_mem);
16866 %}
16867 
16868 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16869 %{
16870   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16871 
16872   format %{ "testq   $src, $mem" %}
16873   ins_encode %{
16874     __ testq($src$$Register, $mem$$Address);
16875   %}
16876   ins_pipe(ialu_cr_reg_mem);
16877 %}
16878 
16879 // Manifest a CmpU result in an integer register.  Very painful.
16880 // This is the test to avoid.
16881 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16882 %{
16883   match(Set dst (CmpU3 src1 src2));
16884   effect(KILL flags);
16885 
16886   ins_cost(275); // XXX
16887   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16888             "movl    $dst, -1\n\t"
16889             "jb,u    done\n\t"
16890             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16891     "done:" %}
16892   ins_encode %{
16893     Label done;
16894     __ cmpl($src1$$Register, $src2$$Register);
16895     __ movl($dst$$Register, -1);
16896     __ jccb(Assembler::below, done);
16897     __ setcc(Assembler::notZero, $dst$$Register);
16898     __ bind(done);
16899   %}
16900   ins_pipe(pipe_slow);
16901 %}
16902 
16903 // Manifest a CmpL result in an integer register.  Very painful.
16904 // This is the test to avoid.
16905 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16906 %{
16907   match(Set dst (CmpL3 src1 src2));
16908   effect(KILL flags);
16909 
16910   ins_cost(275); // XXX
16911   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16912             "movl    $dst, -1\n\t"
16913             "jl,s    done\n\t"
16914             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16915     "done:" %}
16916   ins_encode %{
16917     Label done;
16918     __ cmpq($src1$$Register, $src2$$Register);
16919     __ movl($dst$$Register, -1);
16920     __ jccb(Assembler::less, done);
16921     __ setcc(Assembler::notZero, $dst$$Register);
16922     __ bind(done);
16923   %}
16924   ins_pipe(pipe_slow);
16925 %}
16926 
16927 // Manifest a CmpUL result in an integer register.  Very painful.
16928 // This is the test to avoid.
16929 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16930 %{
16931   match(Set dst (CmpUL3 src1 src2));
16932   effect(KILL flags);
16933 
16934   ins_cost(275); // XXX
16935   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16936             "movl    $dst, -1\n\t"
16937             "jb,u    done\n\t"
16938             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16939     "done:" %}
16940   ins_encode %{
16941     Label done;
16942     __ cmpq($src1$$Register, $src2$$Register);
16943     __ movl($dst$$Register, -1);
16944     __ jccb(Assembler::below, done);
16945     __ setcc(Assembler::notZero, $dst$$Register);
16946     __ bind(done);
16947   %}
16948   ins_pipe(pipe_slow);
16949 %}
16950 
16951 // Unsigned long compare Instructions; really, same as signed long except they
16952 // produce an rFlagsRegU instead of rFlagsReg.
16953 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16954 %{
16955   match(Set cr (CmpUL op1 op2));
16956 
16957   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16958   ins_encode %{
16959     __ cmpq($op1$$Register, $op2$$Register);
16960   %}
16961   ins_pipe(ialu_cr_reg_reg);
16962 %}
16963 
16964 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16965 %{
16966   match(Set cr (CmpUL op1 op2));
16967 
16968   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16969   ins_encode %{
16970     __ cmpq($op1$$Register, $op2$$constant);
16971   %}
16972   ins_pipe(ialu_cr_reg_imm);
16973 %}
16974 
16975 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16976 %{
16977   match(Set cr (CmpUL op1 (LoadL op2)));
16978 
16979   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16980   ins_encode %{
16981     __ cmpq($op1$$Register, $op2$$Address);
16982   %}
16983   ins_pipe(ialu_cr_reg_mem);
16984 %}
16985 
16986 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16987 %{
16988   match(Set cr (CmpUL src zero));
16989 
16990   format %{ "testq   $src, $src\t# unsigned" %}
16991   ins_encode %{
16992     __ testq($src$$Register, $src$$Register);
16993   %}
16994   ins_pipe(ialu_cr_reg_imm);
16995 %}
16996 
16997 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16998 %{
16999   match(Set cr (CmpI (LoadB mem) imm));
17000 
17001   ins_cost(125);
17002   format %{ "cmpb    $mem, $imm" %}
17003   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
17004   ins_pipe(ialu_cr_reg_mem);
17005 %}
17006 
17007 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
17008 %{
17009   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
17010 
17011   ins_cost(125);
17012   format %{ "testb   $mem, $imm\t# ubyte" %}
17013   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
17014   ins_pipe(ialu_cr_reg_mem);
17015 %}
17016 
17017 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
17018 %{
17019   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
17020 
17021   ins_cost(125);
17022   format %{ "testb   $mem, $imm\t# byte" %}
17023   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
17024   ins_pipe(ialu_cr_reg_mem);
17025 %}
17026 
17027 //----------Max and Min--------------------------------------------------------
17028 // Min Instructions
17029 
17030 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
17031 %{
17032   predicate(!UseAPX);
17033   effect(USE_DEF dst, USE src, USE cr);
17034 
17035   format %{ "cmovlgt $dst, $src\t# min" %}
17036   ins_encode %{
17037     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
17038   %}
17039   ins_pipe(pipe_cmov_reg);
17040 %}
17041 
17042 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
17043 %{
17044   predicate(UseAPX);
17045   effect(DEF dst, USE src1, USE src2, USE cr);
17046 
17047   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
17048   ins_encode %{
17049     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
17050   %}
17051   ins_pipe(pipe_cmov_reg);
17052 %}
17053 
17054 instruct minI_rReg(rRegI dst, rRegI src)
17055 %{
17056   predicate(!UseAPX);
17057   match(Set dst (MinI dst src));
17058 
17059   ins_cost(200);
17060   expand %{
17061     rFlagsReg cr;
17062     compI_rReg(cr, dst, src);
17063     cmovI_reg_g(dst, src, cr);
17064   %}
17065 %}
17066 
17067 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17068 %{
17069   predicate(UseAPX);
17070   match(Set dst (MinI src1 src2));
17071   effect(DEF dst, USE src1, USE src2);
17072   flag(PD::Flag_ndd_demotable_opr1);
17073 
17074   ins_cost(200);
17075   expand %{
17076     rFlagsReg cr;
17077     compI_rReg(cr, src1, src2);
17078     cmovI_reg_g_ndd(dst, src1, src2, cr);
17079   %}
17080 %}
17081 
17082 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
17083 %{
17084   predicate(!UseAPX);
17085   effect(USE_DEF dst, USE src, USE cr);
17086 
17087   format %{ "cmovllt $dst, $src\t# max" %}
17088   ins_encode %{
17089     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
17090   %}
17091   ins_pipe(pipe_cmov_reg);
17092 %}
17093 
17094 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
17095 %{
17096   predicate(UseAPX);
17097   effect(DEF dst, USE src1, USE src2, USE cr);
17098 
17099   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
17100   ins_encode %{
17101     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
17102   %}
17103   ins_pipe(pipe_cmov_reg);
17104 %}
17105 
17106 instruct maxI_rReg(rRegI dst, rRegI src)
17107 %{
17108   predicate(!UseAPX);
17109   match(Set dst (MaxI dst src));
17110 
17111   ins_cost(200);
17112   expand %{
17113     rFlagsReg cr;
17114     compI_rReg(cr, dst, src);
17115     cmovI_reg_l(dst, src, cr);
17116   %}
17117 %}
17118 
17119 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17120 %{
17121   predicate(UseAPX);
17122   match(Set dst (MaxI src1 src2));
17123   effect(DEF dst, USE src1, USE src2);
17124   flag(PD::Flag_ndd_demotable_opr1);
17125 
17126   ins_cost(200);
17127   expand %{
17128     rFlagsReg cr;
17129     compI_rReg(cr, src1, src2);
17130     cmovI_reg_l_ndd(dst, src1, src2, cr);
17131   %}
17132 %}
17133 
17134 // ============================================================================
17135 // Branch Instructions
17136 
17137 // Jump Direct - Label defines a relative address from JMP+1
17138 instruct jmpDir(label labl)
17139 %{
17140   match(Goto);
17141   effect(USE labl);
17142 
17143   ins_cost(300);
17144   format %{ "jmp     $labl" %}
17145   size(5);
17146   ins_encode %{
17147     Label* L = $labl$$label;
17148     __ jmp(*L, false); // Always long jump
17149   %}
17150   ins_pipe(pipe_jmp);
17151 %}
17152 
17153 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17154 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
17155 %{
17156   match(If cop cr);
17157   effect(USE labl);
17158 
17159   ins_cost(300);
17160   format %{ "j$cop     $labl" %}
17161   size(6);
17162   ins_encode %{
17163     Label* L = $labl$$label;
17164     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17165   %}
17166   ins_pipe(pipe_jcc);
17167 %}
17168 
17169 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17170 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
17171 %{
17172   match(CountedLoopEnd cop cr);
17173   effect(USE labl);
17174 
17175   ins_cost(300);
17176   format %{ "j$cop     $labl\t# loop end" %}
17177   size(6);
17178   ins_encode %{
17179     Label* L = $labl$$label;
17180     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17181   %}
17182   ins_pipe(pipe_jcc);
17183 %}
17184 
17185 // Jump Direct Conditional - using unsigned comparison
17186 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17187   match(If cop cmp);
17188   effect(USE labl);
17189 
17190   ins_cost(300);
17191   format %{ "j$cop,u   $labl" %}
17192   size(6);
17193   ins_encode %{
17194     Label* L = $labl$$label;
17195     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17196   %}
17197   ins_pipe(pipe_jcc);
17198 %}
17199 
17200 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17201   match(If cop cmp);
17202   effect(USE labl);
17203 
17204   ins_cost(200);
17205   format %{ "j$cop,u   $labl" %}
17206   size(6);
17207   ins_encode %{
17208     Label* L = $labl$$label;
17209     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17210   %}
17211   ins_pipe(pipe_jcc);
17212 %}
17213 
17214 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17215   match(If cop cmp);
17216   effect(USE labl);
17217 
17218   ins_cost(200);
17219   format %{ $$template
17220     if ($cop$$cmpcode == Assembler::notEqual) {
17221       $$emit$$"jp,u    $labl\n\t"
17222       $$emit$$"j$cop,u   $labl"
17223     } else {
17224       $$emit$$"jp,u    done\n\t"
17225       $$emit$$"j$cop,u   $labl\n\t"
17226       $$emit$$"done:"
17227     }
17228   %}
17229   ins_encode %{
17230     Label* l = $labl$$label;
17231     if ($cop$$cmpcode == Assembler::notEqual) {
17232       __ jcc(Assembler::parity, *l, false);
17233       __ jcc(Assembler::notEqual, *l, false);
17234     } else if ($cop$$cmpcode == Assembler::equal) {
17235       Label done;
17236       __ jccb(Assembler::parity, done);
17237       __ jcc(Assembler::equal, *l, false);
17238       __ bind(done);
17239     } else {
17240        ShouldNotReachHere();
17241     }
17242   %}
17243   ins_pipe(pipe_jcc);
17244 %}
17245 
17246 // Jump Direct Conditional - using signed and unsigned comparison
17247 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17248   match(If cop cmp);
17249   effect(USE labl);
17250 
17251   ins_cost(200);
17252   format %{ "j$cop,su   $labl" %}
17253   size(6);
17254   ins_encode %{
17255     Label* L = $labl$$label;
17256     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17257   %}
17258   ins_pipe(pipe_jcc);
17259 %}
17260 
17261 // ============================================================================
17262 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
17263 // superklass array for an instance of the superklass.  Set a hidden
17264 // internal cache on a hit (cache is checked with exposed code in
17265 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
17266 // encoding ALSO sets flags.
17267 
17268 instruct partialSubtypeCheck(rdi_RegP result,
17269                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
17270                              rFlagsReg cr)
17271 %{
17272   match(Set result (PartialSubtypeCheck sub super));
17273   predicate(!UseSecondarySupersTable);
17274   effect(KILL rcx, KILL cr);
17275 
17276   ins_cost(1100);  // slightly larger than the next version
17277   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
17278             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
17279             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
17280             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
17281             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
17282             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
17283             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
17284     "miss:\t" %}
17285 
17286   ins_encode %{
17287     Label miss;
17288     // NB: Callers may assume that, when $result is a valid register,
17289     // check_klass_subtype_slow_path_linear sets it to a nonzero
17290     // value.
17291     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
17292                                             $rcx$$Register, $result$$Register,
17293                                             nullptr, &miss,
17294                                             /*set_cond_codes:*/ true);
17295     __ xorptr($result$$Register, $result$$Register);
17296     __ bind(miss);
17297   %}
17298 
17299   ins_pipe(pipe_slow);
17300 %}
17301 
17302 // ============================================================================
17303 // Two versions of hashtable-based partialSubtypeCheck, both used when
17304 // we need to search for a super class in the secondary supers array.
17305 // The first is used when we don't know _a priori_ the class being
17306 // searched for. The second, far more common, is used when we do know:
17307 // this is used for instanceof, checkcast, and any case where C2 can
17308 // determine it by constant propagation.
17309 
17310 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17311                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17312                                        rFlagsReg cr)
17313 %{
17314   match(Set result (PartialSubtypeCheck sub super));
17315   predicate(UseSecondarySupersTable);
17316   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17317 
17318   ins_cost(1000);
17319   format %{ "partialSubtypeCheck $result, $sub, $super" %}
17320 
17321   ins_encode %{
17322     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17323 					 $temp3$$Register, $temp4$$Register, $result$$Register);
17324   %}
17325 
17326   ins_pipe(pipe_slow);
17327 %}
17328 
17329 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17330                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17331                                        rFlagsReg cr)
17332 %{
17333   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17334   predicate(UseSecondarySupersTable);
17335   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17336 
17337   ins_cost(700);  // smaller than the next version
17338   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17339 
17340   ins_encode %{
17341     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17342     if (InlineSecondarySupersTest) {
17343       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17344                                        $temp3$$Register, $temp4$$Register, $result$$Register,
17345                                        super_klass_slot);
17346     } else {
17347       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17348     }
17349   %}
17350 
17351   ins_pipe(pipe_slow);
17352 %}
17353 
17354 // ============================================================================
17355 // Branch Instructions -- short offset versions
17356 //
17357 // These instructions are used to replace jumps of a long offset (the default
17358 // match) with jumps of a shorter offset.  These instructions are all tagged
17359 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17360 // match rules in general matching.  Instead, the ADLC generates a conversion
17361 // method in the MachNode which can be used to do in-place replacement of the
17362 // long variant with the shorter variant.  The compiler will determine if a
17363 // branch can be taken by the is_short_branch_offset() predicate in the machine
17364 // specific code section of the file.
17365 
17366 // Jump Direct - Label defines a relative address from JMP+1
17367 instruct jmpDir_short(label labl) %{
17368   match(Goto);
17369   effect(USE labl);
17370 
17371   ins_cost(300);
17372   format %{ "jmp,s   $labl" %}
17373   size(2);
17374   ins_encode %{
17375     Label* L = $labl$$label;
17376     __ jmpb(*L);
17377   %}
17378   ins_pipe(pipe_jmp);
17379   ins_short_branch(1);
17380 %}
17381 
17382 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17383 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17384   match(If cop cr);
17385   effect(USE labl);
17386 
17387   ins_cost(300);
17388   format %{ "j$cop,s   $labl" %}
17389   size(2);
17390   ins_encode %{
17391     Label* L = $labl$$label;
17392     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17393   %}
17394   ins_pipe(pipe_jcc);
17395   ins_short_branch(1);
17396 %}
17397 
17398 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17399 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17400   match(CountedLoopEnd cop cr);
17401   effect(USE labl);
17402 
17403   ins_cost(300);
17404   format %{ "j$cop,s   $labl\t# loop end" %}
17405   size(2);
17406   ins_encode %{
17407     Label* L = $labl$$label;
17408     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17409   %}
17410   ins_pipe(pipe_jcc);
17411   ins_short_branch(1);
17412 %}
17413 
17414 // Jump Direct Conditional - using unsigned comparison
17415 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17416   match(If cop cmp);
17417   effect(USE labl);
17418 
17419   ins_cost(300);
17420   format %{ "j$cop,us  $labl" %}
17421   size(2);
17422   ins_encode %{
17423     Label* L = $labl$$label;
17424     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17425   %}
17426   ins_pipe(pipe_jcc);
17427   ins_short_branch(1);
17428 %}
17429 
17430 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17431   match(If cop cmp);
17432   effect(USE labl);
17433 
17434   ins_cost(300);
17435   format %{ "j$cop,us  $labl" %}
17436   size(2);
17437   ins_encode %{
17438     Label* L = $labl$$label;
17439     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17440   %}
17441   ins_pipe(pipe_jcc);
17442   ins_short_branch(1);
17443 %}
17444 
17445 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17446   match(If cop cmp);
17447   effect(USE labl);
17448 
17449   ins_cost(300);
17450   format %{ $$template
17451     if ($cop$$cmpcode == Assembler::notEqual) {
17452       $$emit$$"jp,u,s  $labl\n\t"
17453       $$emit$$"j$cop,u,s  $labl"
17454     } else {
17455       $$emit$$"jp,u,s  done\n\t"
17456       $$emit$$"j$cop,u,s  $labl\n\t"
17457       $$emit$$"done:"
17458     }
17459   %}
17460   size(4);
17461   ins_encode %{
17462     Label* l = $labl$$label;
17463     if ($cop$$cmpcode == Assembler::notEqual) {
17464       __ jccb(Assembler::parity, *l);
17465       __ jccb(Assembler::notEqual, *l);
17466     } else if ($cop$$cmpcode == Assembler::equal) {
17467       Label done;
17468       __ jccb(Assembler::parity, done);
17469       __ jccb(Assembler::equal, *l);
17470       __ bind(done);
17471     } else {
17472        ShouldNotReachHere();
17473     }
17474   %}
17475   ins_pipe(pipe_jcc);
17476   ins_short_branch(1);
17477 %}
17478 
17479 // Jump Direct Conditional - using signed and unsigned comparison
17480 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17481   match(If cop cmp);
17482   effect(USE labl);
17483 
17484   ins_cost(300);
17485   format %{ "j$cop,sus  $labl" %}
17486   size(2);
17487   ins_encode %{
17488     Label* L = $labl$$label;
17489     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17490   %}
17491   ins_pipe(pipe_jcc);
17492   ins_short_branch(1);
17493 %}
17494 
17495 // ============================================================================
17496 // inlined locking and unlocking
17497 
17498 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17499   match(Set cr (FastLock object box));
17500   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17501   ins_cost(300);
17502   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17503   ins_encode %{
17504     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17505   %}
17506   ins_pipe(pipe_slow);
17507 %}
17508 
17509 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17510   match(Set cr (FastUnlock object rax_reg));
17511   effect(TEMP tmp, USE_KILL rax_reg);
17512   ins_cost(300);
17513   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17514   ins_encode %{
17515     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17516   %}
17517   ins_pipe(pipe_slow);
17518 %}
17519 
17520 
17521 // ============================================================================
17522 // Safepoint Instructions
17523 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17524 %{
17525   match(SafePoint poll);
17526   effect(KILL cr, USE poll);
17527 
17528   format %{ "testl   rax, [$poll]\t"
17529             "# Safepoint: poll for GC" %}
17530   ins_cost(125);
17531   ins_encode %{
17532     __ relocate(relocInfo::poll_type);
17533     address pre_pc = __ pc();
17534     __ testl(rax, Address($poll$$Register, 0));
17535     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17536   %}
17537   ins_pipe(ialu_reg_mem);
17538 %}
17539 
17540 instruct mask_all_evexL(kReg dst, rRegL src) %{
17541   match(Set dst (MaskAll src));
17542   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17543   ins_encode %{
17544     int mask_len = Matcher::vector_length(this);
17545     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17546   %}
17547   ins_pipe( pipe_slow );
17548 %}
17549 
17550 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17551   predicate(Matcher::vector_length(n) > 32);
17552   match(Set dst (MaskAll src));
17553   effect(TEMP tmp);
17554   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17555   ins_encode %{
17556     int mask_len = Matcher::vector_length(this);
17557     __ movslq($tmp$$Register, $src$$Register);
17558     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17559   %}
17560   ins_pipe( pipe_slow );
17561 %}
17562 
17563 // ============================================================================
17564 // Procedure Call/Return Instructions
17565 // Call Java Static Instruction
17566 // Note: If this code changes, the corresponding ret_addr_offset() and
17567 //       compute_padding() functions will have to be adjusted.
17568 instruct CallStaticJavaDirect(method meth) %{
17569   match(CallStaticJava);
17570   effect(USE meth);
17571 
17572   ins_cost(300);
17573   format %{ "call,static " %}
17574   opcode(0xE8); /* E8 cd */
17575   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17576   ins_pipe(pipe_slow);
17577   ins_alignment(4);
17578 %}
17579 
17580 // Call Java Dynamic Instruction
17581 // Note: If this code changes, the corresponding ret_addr_offset() and
17582 //       compute_padding() functions will have to be adjusted.
17583 instruct CallDynamicJavaDirect(method meth)
17584 %{
17585   match(CallDynamicJava);
17586   effect(USE meth);
17587 
17588   ins_cost(300);
17589   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17590             "call,dynamic " %}
17591   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17592   ins_pipe(pipe_slow);
17593   ins_alignment(4);
17594 %}
17595 
17596 // Call Runtime Instruction
17597 instruct CallRuntimeDirect(method meth)
17598 %{
17599   match(CallRuntime);
17600   effect(USE meth);
17601 
17602   ins_cost(300);
17603   format %{ "call,runtime " %}
17604   ins_encode(clear_avx, Java_To_Runtime(meth));
17605   ins_pipe(pipe_slow);
17606 %}
17607 
17608 // Call runtime without safepoint
17609 instruct CallLeafDirect(method meth)
17610 %{
17611   match(CallLeaf);
17612   effect(USE meth);
17613 
17614   ins_cost(300);
17615   format %{ "call_leaf,runtime " %}
17616   ins_encode(clear_avx, Java_To_Runtime(meth));
17617   ins_pipe(pipe_slow);
17618 %}
17619 
17620 // Call runtime without safepoint and with vector arguments
17621 instruct CallLeafDirectVector(method meth)
17622 %{
17623   match(CallLeafVector);
17624   effect(USE meth);
17625 
17626   ins_cost(300);
17627   format %{ "call_leaf,vector " %}
17628   ins_encode(Java_To_Runtime(meth));
17629   ins_pipe(pipe_slow);
17630 %}
17631 
17632 // Call runtime without safepoint
17633 // entry point is null, target holds the address to call
17634 instruct CallLeafNoFPInDirect(rRegP target)
17635 %{
17636   predicate(n->as_Call()->entry_point() == nullptr);
17637   match(CallLeafNoFP target);
17638 
17639   ins_cost(300);
17640   format %{ "call_leaf_nofp,runtime indirect " %}
17641   ins_encode %{
17642      __ call($target$$Register);
17643   %}
17644 
17645   ins_pipe(pipe_slow);
17646 %}
17647 
17648 // Call runtime without safepoint
17649 instruct CallLeafNoFPDirect(method meth)
17650 %{
17651   predicate(n->as_Call()->entry_point() != nullptr);
17652   match(CallLeafNoFP);
17653   effect(USE meth);
17654 
17655   ins_cost(300);
17656   format %{ "call_leaf_nofp,runtime " %}
17657   ins_encode(clear_avx, Java_To_Runtime(meth));
17658   ins_pipe(pipe_slow);
17659 %}
17660 
17661 // Return Instruction
17662 // Remove the return address & jump to it.
17663 // Notice: We always emit a nop after a ret to make sure there is room
17664 // for safepoint patching
17665 instruct Ret()
17666 %{
17667   match(Return);
17668 
17669   format %{ "ret" %}
17670   ins_encode %{
17671     __ ret(0);
17672   %}
17673   ins_pipe(pipe_jmp);
17674 %}
17675 
17676 // Tail Call; Jump from runtime stub to Java code.
17677 // Also known as an 'interprocedural jump'.
17678 // Target of jump will eventually return to caller.
17679 // TailJump below removes the return address.
17680 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17681 // emitted just above the TailCall which has reset rbp to the caller state.
17682 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17683 %{
17684   match(TailCall jump_target method_ptr);
17685 
17686   ins_cost(300);
17687   format %{ "jmp     $jump_target\t# rbx holds method" %}
17688   ins_encode %{
17689     __ jmp($jump_target$$Register);
17690   %}
17691   ins_pipe(pipe_jmp);
17692 %}
17693 
17694 // Tail Jump; remove the return address; jump to target.
17695 // TailCall above leaves the return address around.
17696 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17697 %{
17698   match(TailJump jump_target ex_oop);
17699 
17700   ins_cost(300);
17701   format %{ "popq    rdx\t# pop return address\n\t"
17702             "jmp     $jump_target" %}
17703   ins_encode %{
17704     __ popq(as_Register(RDX_enc));
17705     __ jmp($jump_target$$Register);
17706   %}
17707   ins_pipe(pipe_jmp);
17708 %}
17709 
17710 // Forward exception.
17711 instruct ForwardExceptionjmp()
17712 %{
17713   match(ForwardException);
17714 
17715   format %{ "jmp     forward_exception_stub" %}
17716   ins_encode %{
17717     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17718   %}
17719   ins_pipe(pipe_jmp);
17720 %}
17721 
17722 // Create exception oop: created by stack-crawling runtime code.
17723 // Created exception is now available to this handler, and is setup
17724 // just prior to jumping to this handler.  No code emitted.
17725 instruct CreateException(rax_RegP ex_oop)
17726 %{
17727   match(Set ex_oop (CreateEx));
17728 
17729   size(0);
17730   // use the following format syntax
17731   format %{ "# exception oop is in rax; no code emitted" %}
17732   ins_encode();
17733   ins_pipe(empty);
17734 %}
17735 
17736 // Rethrow exception:
17737 // The exception oop will come in the first argument position.
17738 // Then JUMP (not call) to the rethrow stub code.
17739 instruct RethrowException()
17740 %{
17741   match(Rethrow);
17742 
17743   // use the following format syntax
17744   format %{ "jmp     rethrow_stub" %}
17745   ins_encode %{
17746     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17747   %}
17748   ins_pipe(pipe_jmp);
17749 %}
17750 
17751 // ============================================================================
17752 // This name is KNOWN by the ADLC and cannot be changed.
17753 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17754 // for this guy.
17755 instruct tlsLoadP(r15_RegP dst) %{
17756   match(Set dst (ThreadLocal));
17757   effect(DEF dst);
17758 
17759   size(0);
17760   format %{ "# TLS is in R15" %}
17761   ins_encode( /*empty encoding*/ );
17762   ins_pipe(ialu_reg_reg);
17763 %}
17764 
17765 instruct addF_reg(regF dst, regF src) %{
17766   predicate(UseAVX == 0);
17767   match(Set dst (AddF dst src));
17768 
17769   format %{ "addss   $dst, $src" %}
17770   ins_cost(150);
17771   ins_encode %{
17772     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17773   %}
17774   ins_pipe(pipe_slow);
17775 %}
17776 
17777 instruct addF_mem(regF dst, memory src) %{
17778   predicate(UseAVX == 0);
17779   match(Set dst (AddF dst (LoadF src)));
17780 
17781   format %{ "addss   $dst, $src" %}
17782   ins_cost(150);
17783   ins_encode %{
17784     __ addss($dst$$XMMRegister, $src$$Address);
17785   %}
17786   ins_pipe(pipe_slow);
17787 %}
17788 
17789 instruct addF_imm(regF dst, immF con) %{
17790   predicate(UseAVX == 0);
17791   match(Set dst (AddF dst con));
17792   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17793   ins_cost(150);
17794   ins_encode %{
17795     __ addss($dst$$XMMRegister, $constantaddress($con));
17796   %}
17797   ins_pipe(pipe_slow);
17798 %}
17799 
17800 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17801   predicate(UseAVX > 0);
17802   match(Set dst (AddF src1 src2));
17803 
17804   format %{ "vaddss  $dst, $src1, $src2" %}
17805   ins_cost(150);
17806   ins_encode %{
17807     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17808   %}
17809   ins_pipe(pipe_slow);
17810 %}
17811 
17812 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17813   predicate(UseAVX > 0);
17814   match(Set dst (AddF src1 (LoadF src2)));
17815 
17816   format %{ "vaddss  $dst, $src1, $src2" %}
17817   ins_cost(150);
17818   ins_encode %{
17819     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17820   %}
17821   ins_pipe(pipe_slow);
17822 %}
17823 
17824 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17825   predicate(UseAVX > 0);
17826   match(Set dst (AddF src con));
17827 
17828   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17829   ins_cost(150);
17830   ins_encode %{
17831     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17832   %}
17833   ins_pipe(pipe_slow);
17834 %}
17835 
17836 instruct addD_reg(regD dst, regD src) %{
17837   predicate(UseAVX == 0);
17838   match(Set dst (AddD dst src));
17839 
17840   format %{ "addsd   $dst, $src" %}
17841   ins_cost(150);
17842   ins_encode %{
17843     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17844   %}
17845   ins_pipe(pipe_slow);
17846 %}
17847 
17848 instruct addD_mem(regD dst, memory src) %{
17849   predicate(UseAVX == 0);
17850   match(Set dst (AddD dst (LoadD src)));
17851 
17852   format %{ "addsd   $dst, $src" %}
17853   ins_cost(150);
17854   ins_encode %{
17855     __ addsd($dst$$XMMRegister, $src$$Address);
17856   %}
17857   ins_pipe(pipe_slow);
17858 %}
17859 
17860 instruct addD_imm(regD dst, immD con) %{
17861   predicate(UseAVX == 0);
17862   match(Set dst (AddD dst con));
17863   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17864   ins_cost(150);
17865   ins_encode %{
17866     __ addsd($dst$$XMMRegister, $constantaddress($con));
17867   %}
17868   ins_pipe(pipe_slow);
17869 %}
17870 
17871 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17872   predicate(UseAVX > 0);
17873   match(Set dst (AddD src1 src2));
17874 
17875   format %{ "vaddsd  $dst, $src1, $src2" %}
17876   ins_cost(150);
17877   ins_encode %{
17878     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17879   %}
17880   ins_pipe(pipe_slow);
17881 %}
17882 
17883 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17884   predicate(UseAVX > 0);
17885   match(Set dst (AddD src1 (LoadD src2)));
17886 
17887   format %{ "vaddsd  $dst, $src1, $src2" %}
17888   ins_cost(150);
17889   ins_encode %{
17890     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17891   %}
17892   ins_pipe(pipe_slow);
17893 %}
17894 
17895 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17896   predicate(UseAVX > 0);
17897   match(Set dst (AddD src con));
17898 
17899   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17900   ins_cost(150);
17901   ins_encode %{
17902     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17903   %}
17904   ins_pipe(pipe_slow);
17905 %}
17906 
17907 instruct subF_reg(regF dst, regF src) %{
17908   predicate(UseAVX == 0);
17909   match(Set dst (SubF dst src));
17910 
17911   format %{ "subss   $dst, $src" %}
17912   ins_cost(150);
17913   ins_encode %{
17914     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17915   %}
17916   ins_pipe(pipe_slow);
17917 %}
17918 
17919 instruct subF_mem(regF dst, memory src) %{
17920   predicate(UseAVX == 0);
17921   match(Set dst (SubF dst (LoadF src)));
17922 
17923   format %{ "subss   $dst, $src" %}
17924   ins_cost(150);
17925   ins_encode %{
17926     __ subss($dst$$XMMRegister, $src$$Address);
17927   %}
17928   ins_pipe(pipe_slow);
17929 %}
17930 
17931 instruct subF_imm(regF dst, immF con) %{
17932   predicate(UseAVX == 0);
17933   match(Set dst (SubF dst con));
17934   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17935   ins_cost(150);
17936   ins_encode %{
17937     __ subss($dst$$XMMRegister, $constantaddress($con));
17938   %}
17939   ins_pipe(pipe_slow);
17940 %}
17941 
17942 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17943   predicate(UseAVX > 0);
17944   match(Set dst (SubF src1 src2));
17945 
17946   format %{ "vsubss  $dst, $src1, $src2" %}
17947   ins_cost(150);
17948   ins_encode %{
17949     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17950   %}
17951   ins_pipe(pipe_slow);
17952 %}
17953 
17954 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17955   predicate(UseAVX > 0);
17956   match(Set dst (SubF src1 (LoadF src2)));
17957 
17958   format %{ "vsubss  $dst, $src1, $src2" %}
17959   ins_cost(150);
17960   ins_encode %{
17961     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17962   %}
17963   ins_pipe(pipe_slow);
17964 %}
17965 
17966 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17967   predicate(UseAVX > 0);
17968   match(Set dst (SubF src con));
17969 
17970   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17971   ins_cost(150);
17972   ins_encode %{
17973     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17974   %}
17975   ins_pipe(pipe_slow);
17976 %}
17977 
17978 instruct subD_reg(regD dst, regD src) %{
17979   predicate(UseAVX == 0);
17980   match(Set dst (SubD dst src));
17981 
17982   format %{ "subsd   $dst, $src" %}
17983   ins_cost(150);
17984   ins_encode %{
17985     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17986   %}
17987   ins_pipe(pipe_slow);
17988 %}
17989 
17990 instruct subD_mem(regD dst, memory src) %{
17991   predicate(UseAVX == 0);
17992   match(Set dst (SubD dst (LoadD src)));
17993 
17994   format %{ "subsd   $dst, $src" %}
17995   ins_cost(150);
17996   ins_encode %{
17997     __ subsd($dst$$XMMRegister, $src$$Address);
17998   %}
17999   ins_pipe(pipe_slow);
18000 %}
18001 
18002 instruct subD_imm(regD dst, immD con) %{
18003   predicate(UseAVX == 0);
18004   match(Set dst (SubD dst con));
18005   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18006   ins_cost(150);
18007   ins_encode %{
18008     __ subsd($dst$$XMMRegister, $constantaddress($con));
18009   %}
18010   ins_pipe(pipe_slow);
18011 %}
18012 
18013 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
18014   predicate(UseAVX > 0);
18015   match(Set dst (SubD src1 src2));
18016 
18017   format %{ "vsubsd  $dst, $src1, $src2" %}
18018   ins_cost(150);
18019   ins_encode %{
18020     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18021   %}
18022   ins_pipe(pipe_slow);
18023 %}
18024 
18025 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
18026   predicate(UseAVX > 0);
18027   match(Set dst (SubD src1 (LoadD src2)));
18028 
18029   format %{ "vsubsd  $dst, $src1, $src2" %}
18030   ins_cost(150);
18031   ins_encode %{
18032     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18033   %}
18034   ins_pipe(pipe_slow);
18035 %}
18036 
18037 instruct subD_reg_imm(regD dst, regD src, immD con) %{
18038   predicate(UseAVX > 0);
18039   match(Set dst (SubD src con));
18040 
18041   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18042   ins_cost(150);
18043   ins_encode %{
18044     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18045   %}
18046   ins_pipe(pipe_slow);
18047 %}
18048 
18049 instruct mulF_reg(regF dst, regF src) %{
18050   predicate(UseAVX == 0);
18051   match(Set dst (MulF dst src));
18052 
18053   format %{ "mulss   $dst, $src" %}
18054   ins_cost(150);
18055   ins_encode %{
18056     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
18057   %}
18058   ins_pipe(pipe_slow);
18059 %}
18060 
18061 instruct mulF_mem(regF dst, memory src) %{
18062   predicate(UseAVX == 0);
18063   match(Set dst (MulF dst (LoadF src)));
18064 
18065   format %{ "mulss   $dst, $src" %}
18066   ins_cost(150);
18067   ins_encode %{
18068     __ mulss($dst$$XMMRegister, $src$$Address);
18069   %}
18070   ins_pipe(pipe_slow);
18071 %}
18072 
18073 instruct mulF_imm(regF dst, immF con) %{
18074   predicate(UseAVX == 0);
18075   match(Set dst (MulF dst con));
18076   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18077   ins_cost(150);
18078   ins_encode %{
18079     __ mulss($dst$$XMMRegister, $constantaddress($con));
18080   %}
18081   ins_pipe(pipe_slow);
18082 %}
18083 
18084 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
18085   predicate(UseAVX > 0);
18086   match(Set dst (MulF src1 src2));
18087 
18088   format %{ "vmulss  $dst, $src1, $src2" %}
18089   ins_cost(150);
18090   ins_encode %{
18091     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18092   %}
18093   ins_pipe(pipe_slow);
18094 %}
18095 
18096 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
18097   predicate(UseAVX > 0);
18098   match(Set dst (MulF src1 (LoadF src2)));
18099 
18100   format %{ "vmulss  $dst, $src1, $src2" %}
18101   ins_cost(150);
18102   ins_encode %{
18103     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18104   %}
18105   ins_pipe(pipe_slow);
18106 %}
18107 
18108 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
18109   predicate(UseAVX > 0);
18110   match(Set dst (MulF src con));
18111 
18112   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18113   ins_cost(150);
18114   ins_encode %{
18115     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18116   %}
18117   ins_pipe(pipe_slow);
18118 %}
18119 
18120 instruct mulD_reg(regD dst, regD src) %{
18121   predicate(UseAVX == 0);
18122   match(Set dst (MulD dst src));
18123 
18124   format %{ "mulsd   $dst, $src" %}
18125   ins_cost(150);
18126   ins_encode %{
18127     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
18128   %}
18129   ins_pipe(pipe_slow);
18130 %}
18131 
18132 instruct mulD_mem(regD dst, memory src) %{
18133   predicate(UseAVX == 0);
18134   match(Set dst (MulD dst (LoadD src)));
18135 
18136   format %{ "mulsd   $dst, $src" %}
18137   ins_cost(150);
18138   ins_encode %{
18139     __ mulsd($dst$$XMMRegister, $src$$Address);
18140   %}
18141   ins_pipe(pipe_slow);
18142 %}
18143 
18144 instruct mulD_imm(regD dst, immD con) %{
18145   predicate(UseAVX == 0);
18146   match(Set dst (MulD dst con));
18147   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18148   ins_cost(150);
18149   ins_encode %{
18150     __ mulsd($dst$$XMMRegister, $constantaddress($con));
18151   %}
18152   ins_pipe(pipe_slow);
18153 %}
18154 
18155 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
18156   predicate(UseAVX > 0);
18157   match(Set dst (MulD src1 src2));
18158 
18159   format %{ "vmulsd  $dst, $src1, $src2" %}
18160   ins_cost(150);
18161   ins_encode %{
18162     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18163   %}
18164   ins_pipe(pipe_slow);
18165 %}
18166 
18167 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
18168   predicate(UseAVX > 0);
18169   match(Set dst (MulD src1 (LoadD src2)));
18170 
18171   format %{ "vmulsd  $dst, $src1, $src2" %}
18172   ins_cost(150);
18173   ins_encode %{
18174     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18175   %}
18176   ins_pipe(pipe_slow);
18177 %}
18178 
18179 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
18180   predicate(UseAVX > 0);
18181   match(Set dst (MulD src con));
18182 
18183   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18184   ins_cost(150);
18185   ins_encode %{
18186     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18187   %}
18188   ins_pipe(pipe_slow);
18189 %}
18190 
18191 instruct divF_reg(regF dst, regF src) %{
18192   predicate(UseAVX == 0);
18193   match(Set dst (DivF dst src));
18194 
18195   format %{ "divss   $dst, $src" %}
18196   ins_cost(150);
18197   ins_encode %{
18198     __ divss($dst$$XMMRegister, $src$$XMMRegister);
18199   %}
18200   ins_pipe(pipe_slow);
18201 %}
18202 
18203 instruct divF_mem(regF dst, memory src) %{
18204   predicate(UseAVX == 0);
18205   match(Set dst (DivF dst (LoadF src)));
18206 
18207   format %{ "divss   $dst, $src" %}
18208   ins_cost(150);
18209   ins_encode %{
18210     __ divss($dst$$XMMRegister, $src$$Address);
18211   %}
18212   ins_pipe(pipe_slow);
18213 %}
18214 
18215 instruct divF_imm(regF dst, immF con) %{
18216   predicate(UseAVX == 0);
18217   match(Set dst (DivF dst con));
18218   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18219   ins_cost(150);
18220   ins_encode %{
18221     __ divss($dst$$XMMRegister, $constantaddress($con));
18222   %}
18223   ins_pipe(pipe_slow);
18224 %}
18225 
18226 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
18227   predicate(UseAVX > 0);
18228   match(Set dst (DivF src1 src2));
18229 
18230   format %{ "vdivss  $dst, $src1, $src2" %}
18231   ins_cost(150);
18232   ins_encode %{
18233     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18234   %}
18235   ins_pipe(pipe_slow);
18236 %}
18237 
18238 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
18239   predicate(UseAVX > 0);
18240   match(Set dst (DivF src1 (LoadF src2)));
18241 
18242   format %{ "vdivss  $dst, $src1, $src2" %}
18243   ins_cost(150);
18244   ins_encode %{
18245     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18246   %}
18247   ins_pipe(pipe_slow);
18248 %}
18249 
18250 instruct divF_reg_imm(regF dst, regF src, immF con) %{
18251   predicate(UseAVX > 0);
18252   match(Set dst (DivF src con));
18253 
18254   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18255   ins_cost(150);
18256   ins_encode %{
18257     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18258   %}
18259   ins_pipe(pipe_slow);
18260 %}
18261 
18262 instruct divD_reg(regD dst, regD src) %{
18263   predicate(UseAVX == 0);
18264   match(Set dst (DivD dst src));
18265 
18266   format %{ "divsd   $dst, $src" %}
18267   ins_cost(150);
18268   ins_encode %{
18269     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
18270   %}
18271   ins_pipe(pipe_slow);
18272 %}
18273 
18274 instruct divD_mem(regD dst, memory src) %{
18275   predicate(UseAVX == 0);
18276   match(Set dst (DivD dst (LoadD src)));
18277 
18278   format %{ "divsd   $dst, $src" %}
18279   ins_cost(150);
18280   ins_encode %{
18281     __ divsd($dst$$XMMRegister, $src$$Address);
18282   %}
18283   ins_pipe(pipe_slow);
18284 %}
18285 
18286 instruct divD_imm(regD dst, immD con) %{
18287   predicate(UseAVX == 0);
18288   match(Set dst (DivD dst con));
18289   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18290   ins_cost(150);
18291   ins_encode %{
18292     __ divsd($dst$$XMMRegister, $constantaddress($con));
18293   %}
18294   ins_pipe(pipe_slow);
18295 %}
18296 
18297 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
18298   predicate(UseAVX > 0);
18299   match(Set dst (DivD src1 src2));
18300 
18301   format %{ "vdivsd  $dst, $src1, $src2" %}
18302   ins_cost(150);
18303   ins_encode %{
18304     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18305   %}
18306   ins_pipe(pipe_slow);
18307 %}
18308 
18309 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18310   predicate(UseAVX > 0);
18311   match(Set dst (DivD src1 (LoadD src2)));
18312 
18313   format %{ "vdivsd  $dst, $src1, $src2" %}
18314   ins_cost(150);
18315   ins_encode %{
18316     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18317   %}
18318   ins_pipe(pipe_slow);
18319 %}
18320 
18321 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18322   predicate(UseAVX > 0);
18323   match(Set dst (DivD src con));
18324 
18325   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18326   ins_cost(150);
18327   ins_encode %{
18328     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18329   %}
18330   ins_pipe(pipe_slow);
18331 %}
18332 
18333 instruct absF_reg(regF dst) %{
18334   predicate(UseAVX == 0);
18335   match(Set dst (AbsF dst));
18336   ins_cost(150);
18337   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
18338   ins_encode %{
18339     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18340   %}
18341   ins_pipe(pipe_slow);
18342 %}
18343 
18344 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18345   predicate(UseAVX > 0);
18346   match(Set dst (AbsF src));
18347   ins_cost(150);
18348   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18349   ins_encode %{
18350     int vlen_enc = Assembler::AVX_128bit;
18351     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18352               ExternalAddress(float_signmask()), vlen_enc);
18353   %}
18354   ins_pipe(pipe_slow);
18355 %}
18356 
18357 instruct absD_reg(regD dst) %{
18358   predicate(UseAVX == 0);
18359   match(Set dst (AbsD dst));
18360   ins_cost(150);
18361   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
18362             "# abs double by sign masking" %}
18363   ins_encode %{
18364     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18365   %}
18366   ins_pipe(pipe_slow);
18367 %}
18368 
18369 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18370   predicate(UseAVX > 0);
18371   match(Set dst (AbsD src));
18372   ins_cost(150);
18373   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
18374             "# abs double by sign masking" %}
18375   ins_encode %{
18376     int vlen_enc = Assembler::AVX_128bit;
18377     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18378               ExternalAddress(double_signmask()), vlen_enc);
18379   %}
18380   ins_pipe(pipe_slow);
18381 %}
18382 
18383 instruct negF_reg(regF dst) %{
18384   predicate(UseAVX == 0);
18385   match(Set dst (NegF dst));
18386   ins_cost(150);
18387   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
18388   ins_encode %{
18389     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18390   %}
18391   ins_pipe(pipe_slow);
18392 %}
18393 
18394 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18395   predicate(UseAVX > 0);
18396   match(Set dst (NegF src));
18397   ins_cost(150);
18398   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18399   ins_encode %{
18400     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18401                  ExternalAddress(float_signflip()));
18402   %}
18403   ins_pipe(pipe_slow);
18404 %}
18405 
18406 instruct negD_reg(regD dst) %{
18407   predicate(UseAVX == 0);
18408   match(Set dst (NegD dst));
18409   ins_cost(150);
18410   format %{ "xorpd   $dst, [0x8000000000000000]\t"
18411             "# neg double by sign flipping" %}
18412   ins_encode %{
18413     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18414   %}
18415   ins_pipe(pipe_slow);
18416 %}
18417 
18418 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18419   predicate(UseAVX > 0);
18420   match(Set dst (NegD src));
18421   ins_cost(150);
18422   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
18423             "# neg double by sign flipping" %}
18424   ins_encode %{
18425     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18426                  ExternalAddress(double_signflip()));
18427   %}
18428   ins_pipe(pipe_slow);
18429 %}
18430 
18431 // sqrtss instruction needs destination register to be pre initialized for best performance
18432 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18433 instruct sqrtF_reg(regF dst) %{
18434   match(Set dst (SqrtF dst));
18435   format %{ "sqrtss  $dst, $dst" %}
18436   ins_encode %{
18437     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18438   %}
18439   ins_pipe(pipe_slow);
18440 %}
18441 
18442 // sqrtsd instruction needs destination register to be pre initialized for best performance
18443 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18444 instruct sqrtD_reg(regD dst) %{
18445   match(Set dst (SqrtD dst));
18446   format %{ "sqrtsd  $dst, $dst" %}
18447   ins_encode %{
18448     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18449   %}
18450   ins_pipe(pipe_slow);
18451 %}
18452 
18453 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18454   effect(TEMP tmp);
18455   match(Set dst (ConvF2HF src));
18456   ins_cost(125);
18457   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18458   ins_encode %{
18459     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18460   %}
18461   ins_pipe( pipe_slow );
18462 %}
18463 
18464 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18465   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18466   effect(TEMP ktmp, TEMP rtmp);
18467   match(Set mem (StoreC mem (ConvF2HF src)));
18468   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18469   ins_encode %{
18470     __ movl($rtmp$$Register, 0x1);
18471     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18472     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18473   %}
18474   ins_pipe( pipe_slow );
18475 %}
18476 
18477 instruct vconvF2HF(vec dst, vec src) %{
18478   match(Set dst (VectorCastF2HF src));
18479   format %{ "vector_conv_F2HF $dst $src" %}
18480   ins_encode %{
18481     int vlen_enc = vector_length_encoding(this, $src);
18482     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18483   %}
18484   ins_pipe( pipe_slow );
18485 %}
18486 
18487 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18488   predicate(n->as_StoreVector()->memory_size() >= 16);
18489   match(Set mem (StoreVector mem (VectorCastF2HF src)));
18490   format %{ "vcvtps2ph $mem,$src" %}
18491   ins_encode %{
18492     int vlen_enc = vector_length_encoding(this, $src);
18493     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18494   %}
18495   ins_pipe( pipe_slow );
18496 %}
18497 
18498 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18499   match(Set dst (ConvHF2F src));
18500   format %{ "vcvtph2ps $dst,$src" %}
18501   ins_encode %{
18502     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18503   %}
18504   ins_pipe( pipe_slow );
18505 %}
18506 
18507 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18508   match(Set dst (VectorCastHF2F (LoadVector mem)));
18509   format %{ "vcvtph2ps $dst,$mem" %}
18510   ins_encode %{
18511     int vlen_enc = vector_length_encoding(this);
18512     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18513   %}
18514   ins_pipe( pipe_slow );
18515 %}
18516 
18517 instruct vconvHF2F(vec dst, vec src) %{
18518   match(Set dst (VectorCastHF2F src));
18519   ins_cost(125);
18520   format %{ "vector_conv_HF2F $dst,$src" %}
18521   ins_encode %{
18522     int vlen_enc = vector_length_encoding(this);
18523     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18524   %}
18525   ins_pipe( pipe_slow );
18526 %}
18527 
18528 // ---------------------------------------- VectorReinterpret ------------------------------------
18529 instruct reinterpret_mask(kReg dst) %{
18530   predicate(n->bottom_type()->isa_vectmask() &&
18531             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18532   match(Set dst (VectorReinterpret dst));
18533   ins_cost(125);
18534   format %{ "vector_reinterpret $dst\t!" %}
18535   ins_encode %{
18536     // empty
18537   %}
18538   ins_pipe( pipe_slow );
18539 %}
18540 
18541 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18542   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18543             n->bottom_type()->isa_vectmask() &&
18544             n->in(1)->bottom_type()->isa_vectmask() &&
18545             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18546             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18547   match(Set dst (VectorReinterpret src));
18548   effect(TEMP xtmp);
18549   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18550   ins_encode %{
18551      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18552      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18553      assert(src_sz == dst_sz , "src and dst size mismatch");
18554      int vlen_enc = vector_length_encoding(src_sz);
18555      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18556      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18557   %}
18558   ins_pipe( pipe_slow );
18559 %}
18560 
18561 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18562   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18563             n->bottom_type()->isa_vectmask() &&
18564             n->in(1)->bottom_type()->isa_vectmask() &&
18565             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18566              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18567             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18568   match(Set dst (VectorReinterpret src));
18569   effect(TEMP xtmp);
18570   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18571   ins_encode %{
18572      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18573      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18574      assert(src_sz == dst_sz , "src and dst size mismatch");
18575      int vlen_enc = vector_length_encoding(src_sz);
18576      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18577      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18578   %}
18579   ins_pipe( pipe_slow );
18580 %}
18581 
18582 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18583   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18584             n->bottom_type()->isa_vectmask() &&
18585             n->in(1)->bottom_type()->isa_vectmask() &&
18586             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18587              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18588             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18589   match(Set dst (VectorReinterpret src));
18590   effect(TEMP xtmp);
18591   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18592   ins_encode %{
18593      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18594      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18595      assert(src_sz == dst_sz , "src and dst size mismatch");
18596      int vlen_enc = vector_length_encoding(src_sz);
18597      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18598      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18599   %}
18600   ins_pipe( pipe_slow );
18601 %}
18602 
18603 instruct reinterpret(vec dst) %{
18604   predicate(!n->bottom_type()->isa_vectmask() &&
18605             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18606   match(Set dst (VectorReinterpret dst));
18607   ins_cost(125);
18608   format %{ "vector_reinterpret $dst\t!" %}
18609   ins_encode %{
18610     // empty
18611   %}
18612   ins_pipe( pipe_slow );
18613 %}
18614 
18615 instruct reinterpret_expand(vec dst, vec src) %{
18616   predicate(UseAVX == 0 &&
18617             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18618   match(Set dst (VectorReinterpret src));
18619   ins_cost(125);
18620   effect(TEMP dst);
18621   format %{ "vector_reinterpret_expand $dst,$src" %}
18622   ins_encode %{
18623     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18624     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18625 
18626     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18627     if (src_vlen_in_bytes == 4) {
18628       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18629     } else {
18630       assert(src_vlen_in_bytes == 8, "");
18631       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18632     }
18633     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18634   %}
18635   ins_pipe( pipe_slow );
18636 %}
18637 
18638 instruct vreinterpret_expand4(legVec dst, vec src) %{
18639   predicate(UseAVX > 0 &&
18640             !n->bottom_type()->isa_vectmask() &&
18641             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18642             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18643   match(Set dst (VectorReinterpret src));
18644   ins_cost(125);
18645   format %{ "vector_reinterpret_expand $dst,$src" %}
18646   ins_encode %{
18647     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18648   %}
18649   ins_pipe( pipe_slow );
18650 %}
18651 
18652 
18653 instruct vreinterpret_expand(legVec dst, vec src) %{
18654   predicate(UseAVX > 0 &&
18655             !n->bottom_type()->isa_vectmask() &&
18656             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18657             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18658   match(Set dst (VectorReinterpret src));
18659   ins_cost(125);
18660   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18661   ins_encode %{
18662     switch (Matcher::vector_length_in_bytes(this, $src)) {
18663       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18664       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18665       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18666       default: ShouldNotReachHere();
18667     }
18668   %}
18669   ins_pipe( pipe_slow );
18670 %}
18671 
18672 instruct reinterpret_shrink(vec dst, legVec src) %{
18673   predicate(!n->bottom_type()->isa_vectmask() &&
18674             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18675   match(Set dst (VectorReinterpret src));
18676   ins_cost(125);
18677   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18678   ins_encode %{
18679     switch (Matcher::vector_length_in_bytes(this)) {
18680       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18681       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18682       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18683       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18684       default: ShouldNotReachHere();
18685     }
18686   %}
18687   ins_pipe( pipe_slow );
18688 %}
18689 
18690 // ----------------------------------------------------------------------------------------------------
18691 
18692 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18693   match(Set dst (RoundDoubleMode src rmode));
18694   format %{ "roundsd $dst,$src" %}
18695   ins_cost(150);
18696   ins_encode %{
18697     assert(UseSSE >= 4, "required");
18698     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18699       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18700     }
18701     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18702   %}
18703   ins_pipe(pipe_slow);
18704 %}
18705 
18706 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18707   match(Set dst (RoundDoubleMode con rmode));
18708   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18709   ins_cost(150);
18710   ins_encode %{
18711     assert(UseSSE >= 4, "required");
18712     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18713   %}
18714   ins_pipe(pipe_slow);
18715 %}
18716 
18717 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18718   predicate(Matcher::vector_length(n) < 8);
18719   match(Set dst (RoundDoubleModeV src rmode));
18720   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18721   ins_encode %{
18722     assert(UseAVX > 0, "required");
18723     int vlen_enc = vector_length_encoding(this);
18724     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18725   %}
18726   ins_pipe( pipe_slow );
18727 %}
18728 
18729 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18730   predicate(Matcher::vector_length(n) == 8);
18731   match(Set dst (RoundDoubleModeV src rmode));
18732   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18733   ins_encode %{
18734     assert(UseAVX > 2, "required");
18735     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18736   %}
18737   ins_pipe( pipe_slow );
18738 %}
18739 
18740 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18741   predicate(Matcher::vector_length(n) < 8);
18742   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18743   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18744   ins_encode %{
18745     assert(UseAVX > 0, "required");
18746     int vlen_enc = vector_length_encoding(this);
18747     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18748   %}
18749   ins_pipe( pipe_slow );
18750 %}
18751 
18752 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18753   predicate(Matcher::vector_length(n) == 8);
18754   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18755   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18756   ins_encode %{
18757     assert(UseAVX > 2, "required");
18758     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18759   %}
18760   ins_pipe( pipe_slow );
18761 %}
18762 
18763 instruct onspinwait() %{
18764   match(OnSpinWait);
18765   ins_cost(200);
18766 
18767   format %{
18768     $$template
18769     $$emit$$"pause\t! membar_onspinwait"
18770   %}
18771   ins_encode %{
18772     __ pause();
18773   %}
18774   ins_pipe(pipe_slow);
18775 %}
18776 
18777 // a * b + c
18778 instruct fmaD_reg(regD a, regD b, regD c) %{
18779   match(Set c (FmaD  c (Binary a b)));
18780   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18781   ins_cost(150);
18782   ins_encode %{
18783     assert(UseFMA, "Needs FMA instructions support.");
18784     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18785   %}
18786   ins_pipe( pipe_slow );
18787 %}
18788 
18789 // a * b + c
18790 instruct fmaF_reg(regF a, regF b, regF c) %{
18791   match(Set c (FmaF  c (Binary a b)));
18792   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18793   ins_cost(150);
18794   ins_encode %{
18795     assert(UseFMA, "Needs FMA instructions support.");
18796     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18797   %}
18798   ins_pipe( pipe_slow );
18799 %}
18800 
18801 // ====================VECTOR INSTRUCTIONS=====================================
18802 
18803 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18804 instruct MoveVec2Leg(legVec dst, vec src) %{
18805   match(Set dst src);
18806   format %{ "" %}
18807   ins_encode %{
18808     ShouldNotReachHere();
18809   %}
18810   ins_pipe( fpu_reg_reg );
18811 %}
18812 
18813 instruct MoveLeg2Vec(vec dst, legVec src) %{
18814   match(Set dst src);
18815   format %{ "" %}
18816   ins_encode %{
18817     ShouldNotReachHere();
18818   %}
18819   ins_pipe( fpu_reg_reg );
18820 %}
18821 
18822 // ============================================================================
18823 
18824 // Load vectors generic operand pattern
18825 instruct loadV(vec dst, memory mem) %{
18826   match(Set dst (LoadVector mem));
18827   ins_cost(125);
18828   format %{ "load_vector $dst,$mem" %}
18829   ins_encode %{
18830     BasicType bt = Matcher::vector_element_basic_type(this);
18831     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18832   %}
18833   ins_pipe( pipe_slow );
18834 %}
18835 
18836 // Store vectors generic operand pattern.
18837 instruct storeV(memory mem, vec src) %{
18838   match(Set mem (StoreVector mem src));
18839   ins_cost(145);
18840   format %{ "store_vector $mem,$src\n\t" %}
18841   ins_encode %{
18842     switch (Matcher::vector_length_in_bytes(this, $src)) {
18843       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18844       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18845       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18846       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18847       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18848       default: ShouldNotReachHere();
18849     }
18850   %}
18851   ins_pipe( pipe_slow );
18852 %}
18853 
18854 // ---------------------------------------- Gather ------------------------------------
18855 
18856 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18857 
18858 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18859   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18860             Matcher::vector_length_in_bytes(n) <= 32);
18861   match(Set dst (LoadVectorGather mem idx));
18862   effect(TEMP dst, TEMP tmp, TEMP mask);
18863   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18864   ins_encode %{
18865     int vlen_enc = vector_length_encoding(this);
18866     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18867     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18868     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18869     __ lea($tmp$$Register, $mem$$Address);
18870     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18871   %}
18872   ins_pipe( pipe_slow );
18873 %}
18874 
18875 
18876 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18877   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18878             !is_subword_type(Matcher::vector_element_basic_type(n)));
18879   match(Set dst (LoadVectorGather mem idx));
18880   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18881   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18882   ins_encode %{
18883     int vlen_enc = vector_length_encoding(this);
18884     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18885     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18886     __ lea($tmp$$Register, $mem$$Address);
18887     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18888   %}
18889   ins_pipe( pipe_slow );
18890 %}
18891 
18892 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18893   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18894             !is_subword_type(Matcher::vector_element_basic_type(n)));
18895   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18896   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18897   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18898   ins_encode %{
18899     assert(UseAVX > 2, "sanity");
18900     int vlen_enc = vector_length_encoding(this);
18901     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18902     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18903     // Note: Since gather instruction partially updates the opmask register used
18904     // for predication hense moving mask operand to a temporary.
18905     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18906     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18907     __ lea($tmp$$Register, $mem$$Address);
18908     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18909   %}
18910   ins_pipe( pipe_slow );
18911 %}
18912 
18913 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18914   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18915   match(Set dst (LoadVectorGather mem idx_base));
18916   effect(TEMP tmp, TEMP rtmp);
18917   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18918   ins_encode %{
18919     int vlen_enc = vector_length_encoding(this);
18920     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18921     __ lea($tmp$$Register, $mem$$Address);
18922     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18923   %}
18924   ins_pipe( pipe_slow );
18925 %}
18926 
18927 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18928                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18929   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18930   match(Set dst (LoadVectorGather mem idx_base));
18931   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18932   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18933   ins_encode %{
18934     int vlen_enc = vector_length_encoding(this);
18935     int vector_len = Matcher::vector_length(this);
18936     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18937     __ lea($tmp$$Register, $mem$$Address);
18938     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18939     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18940                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18941   %}
18942   ins_pipe( pipe_slow );
18943 %}
18944 
18945 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18946   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18947   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18948   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18949   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18950   ins_encode %{
18951     int vlen_enc = vector_length_encoding(this);
18952     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18953     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18954     __ lea($tmp$$Register, $mem$$Address);
18955     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18956     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18957   %}
18958   ins_pipe( pipe_slow );
18959 %}
18960 
18961 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18962                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18963   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18964   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18965   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18966   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18967   ins_encode %{
18968     int vlen_enc = vector_length_encoding(this);
18969     int vector_len = Matcher::vector_length(this);
18970     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18971     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18972     __ lea($tmp$$Register, $mem$$Address);
18973     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18974     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18975     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18976                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18977   %}
18978   ins_pipe( pipe_slow );
18979 %}
18980 
18981 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18982   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18983   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18984   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18985   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18986   ins_encode %{
18987     int vlen_enc = vector_length_encoding(this);
18988     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18989     __ lea($tmp$$Register, $mem$$Address);
18990     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18991     if (elem_bt == T_SHORT) {
18992       __ movl($mask_idx$$Register, 0x55555555);
18993       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18994     }
18995     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18996     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18997   %}
18998   ins_pipe( pipe_slow );
18999 %}
19000 
19001 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
19002                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
19003   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
19004   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
19005   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
19006   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
19007   ins_encode %{
19008     int vlen_enc = vector_length_encoding(this);
19009     int vector_len = Matcher::vector_length(this);
19010     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19011     __ lea($tmp$$Register, $mem$$Address);
19012     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
19013     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
19014     if (elem_bt == T_SHORT) {
19015       __ movl($mask_idx$$Register, 0x55555555);
19016       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
19017     }
19018     __ xorl($mask_idx$$Register, $mask_idx$$Register);
19019     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
19020                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
19021   %}
19022   ins_pipe( pipe_slow );
19023 %}
19024 
19025 // ====================Scatter=======================================
19026 
19027 // Scatter INT, LONG, FLOAT, DOUBLE
19028 
19029 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
19030   predicate(UseAVX > 2);
19031   match(Set mem (StoreVectorScatter mem (Binary src idx)));
19032   effect(TEMP tmp, TEMP ktmp);
19033   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
19034   ins_encode %{
19035     int vlen_enc = vector_length_encoding(this, $src);
19036     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
19037 
19038     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
19039     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
19040 
19041     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
19042     __ lea($tmp$$Register, $mem$$Address);
19043     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
19044   %}
19045   ins_pipe( pipe_slow );
19046 %}
19047 
19048 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
19049   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
19050   effect(TEMP tmp, TEMP ktmp);
19051   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
19052   ins_encode %{
19053     int vlen_enc = vector_length_encoding(this, $src);
19054     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
19055     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
19056     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
19057     // Note: Since scatter instruction partially updates the opmask register used
19058     // for predication hense moving mask operand to a temporary.
19059     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
19060     __ lea($tmp$$Register, $mem$$Address);
19061     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
19062   %}
19063   ins_pipe( pipe_slow );
19064 %}
19065 
19066 // ====================REPLICATE=======================================
19067 
19068 // Replicate byte scalar to be vector
19069 instruct vReplB_reg(vec dst, rRegI src) %{
19070   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
19071   match(Set dst (Replicate src));
19072   format %{ "replicateB $dst,$src" %}
19073   ins_encode %{
19074     uint vlen = Matcher::vector_length(this);
19075     if (UseAVX >= 2) {
19076       int vlen_enc = vector_length_encoding(this);
19077       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19078         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
19079         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
19080       } else {
19081         __ movdl($dst$$XMMRegister, $src$$Register);
19082         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19083       }
19084     } else {
19085        assert(UseAVX < 2, "");
19086       __ movdl($dst$$XMMRegister, $src$$Register);
19087       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
19088       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19089       if (vlen >= 16) {
19090         assert(vlen == 16, "");
19091         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19092       }
19093     }
19094   %}
19095   ins_pipe( pipe_slow );
19096 %}
19097 
19098 instruct ReplB_mem(vec dst, memory mem) %{
19099   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
19100   match(Set dst (Replicate (LoadB mem)));
19101   format %{ "replicateB $dst,$mem" %}
19102   ins_encode %{
19103     int vlen_enc = vector_length_encoding(this);
19104     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
19105   %}
19106   ins_pipe( pipe_slow );
19107 %}
19108 
19109 // ====================ReplicateS=======================================
19110 
19111 instruct vReplS_reg(vec dst, rRegI src) %{
19112   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
19113   match(Set dst (Replicate src));
19114   format %{ "replicateS $dst,$src" %}
19115   ins_encode %{
19116     uint vlen = Matcher::vector_length(this);
19117     int vlen_enc = vector_length_encoding(this);
19118     if (UseAVX >= 2) {
19119       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19120         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
19121         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
19122       } else {
19123         __ movdl($dst$$XMMRegister, $src$$Register);
19124         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19125       }
19126     } else {
19127       assert(UseAVX < 2, "");
19128       __ movdl($dst$$XMMRegister, $src$$Register);
19129       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19130       if (vlen >= 8) {
19131         assert(vlen == 8, "");
19132         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19133       }
19134     }
19135   %}
19136   ins_pipe( pipe_slow );
19137 %}
19138 
19139 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
19140   match(Set dst (Replicate con));
19141   effect(TEMP rtmp);
19142   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
19143   ins_encode %{
19144     int vlen_enc = vector_length_encoding(this);
19145     BasicType bt = Matcher::vector_element_basic_type(this);
19146     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
19147     __ movl($rtmp$$Register, $con$$constant);
19148     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19149   %}
19150   ins_pipe( pipe_slow );
19151 %}
19152 
19153 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
19154   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
19155   match(Set dst (Replicate src));
19156   effect(TEMP rtmp);
19157   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
19158   ins_encode %{
19159     int vlen_enc = vector_length_encoding(this);
19160     __ evmovw($rtmp$$Register, $src$$XMMRegister);
19161     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19162   %}
19163   ins_pipe( pipe_slow );
19164 %}
19165 
19166 instruct ReplS_mem(vec dst, memory mem) %{
19167   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
19168   match(Set dst (Replicate (LoadS mem)));
19169   format %{ "replicateS $dst,$mem" %}
19170   ins_encode %{
19171     int vlen_enc = vector_length_encoding(this);
19172     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
19173   %}
19174   ins_pipe( pipe_slow );
19175 %}
19176 
19177 // ====================ReplicateI=======================================
19178 
19179 instruct ReplI_reg(vec dst, rRegI src) %{
19180   predicate(Matcher::vector_element_basic_type(n) == T_INT);
19181   match(Set dst (Replicate src));
19182   format %{ "replicateI $dst,$src" %}
19183   ins_encode %{
19184     uint vlen = Matcher::vector_length(this);
19185     int vlen_enc = vector_length_encoding(this);
19186     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19187       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
19188     } else if (VM_Version::supports_avx2()) {
19189       __ movdl($dst$$XMMRegister, $src$$Register);
19190       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19191     } else {
19192       __ movdl($dst$$XMMRegister, $src$$Register);
19193       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19194     }
19195   %}
19196   ins_pipe( pipe_slow );
19197 %}
19198 
19199 instruct ReplI_mem(vec dst, memory mem) %{
19200   predicate(Matcher::vector_element_basic_type(n) == T_INT);
19201   match(Set dst (Replicate (LoadI mem)));
19202   format %{ "replicateI $dst,$mem" %}
19203   ins_encode %{
19204     int vlen_enc = vector_length_encoding(this);
19205     if (VM_Version::supports_avx2()) {
19206       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19207     } else if (VM_Version::supports_avx()) {
19208       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19209     } else {
19210       __ movdl($dst$$XMMRegister, $mem$$Address);
19211       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19212     }
19213   %}
19214   ins_pipe( pipe_slow );
19215 %}
19216 
19217 instruct ReplI_imm(vec dst, immI con) %{
19218   predicate(Matcher::is_non_long_integral_vector(n));
19219   match(Set dst (Replicate con));
19220   format %{ "replicateI $dst,$con" %}
19221   ins_encode %{
19222     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
19223                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
19224                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
19225     BasicType bt = Matcher::vector_element_basic_type(this);
19226     int vlen = Matcher::vector_length_in_bytes(this);
19227     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
19228   %}
19229   ins_pipe( pipe_slow );
19230 %}
19231 
19232 // Replicate scalar zero to be vector
19233 instruct ReplI_zero(vec dst, immI_0 zero) %{
19234   predicate(Matcher::is_non_long_integral_vector(n));
19235   match(Set dst (Replicate zero));
19236   format %{ "replicateI $dst,$zero" %}
19237   ins_encode %{
19238     int vlen_enc = vector_length_encoding(this);
19239     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19240       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19241     } else {
19242       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19243     }
19244   %}
19245   ins_pipe( fpu_reg_reg );
19246 %}
19247 
19248 instruct ReplI_M1(vec dst, immI_M1 con) %{
19249   predicate(Matcher::is_non_long_integral_vector(n));
19250   match(Set dst (Replicate con));
19251   format %{ "vallones $dst" %}
19252   ins_encode %{
19253     int vector_len = vector_length_encoding(this);
19254     __ vallones($dst$$XMMRegister, vector_len);
19255   %}
19256   ins_pipe( pipe_slow );
19257 %}
19258 
19259 // ====================ReplicateL=======================================
19260 
19261 // Replicate long (8 byte) scalar to be vector
19262 instruct ReplL_reg(vec dst, rRegL src) %{
19263   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19264   match(Set dst (Replicate src));
19265   format %{ "replicateL $dst,$src" %}
19266   ins_encode %{
19267     int vlen = Matcher::vector_length(this);
19268     int vlen_enc = vector_length_encoding(this);
19269     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19270       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
19271     } else if (VM_Version::supports_avx2()) {
19272       __ movdq($dst$$XMMRegister, $src$$Register);
19273       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19274     } else {
19275       __ movdq($dst$$XMMRegister, $src$$Register);
19276       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19277     }
19278   %}
19279   ins_pipe( pipe_slow );
19280 %}
19281 
19282 instruct ReplL_mem(vec dst, memory mem) %{
19283   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19284   match(Set dst (Replicate (LoadL mem)));
19285   format %{ "replicateL $dst,$mem" %}
19286   ins_encode %{
19287     int vlen_enc = vector_length_encoding(this);
19288     if (VM_Version::supports_avx2()) {
19289       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
19290     } else if (VM_Version::supports_sse3()) {
19291       __ movddup($dst$$XMMRegister, $mem$$Address);
19292     } else {
19293       __ movq($dst$$XMMRegister, $mem$$Address);
19294       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19295     }
19296   %}
19297   ins_pipe( pipe_slow );
19298 %}
19299 
19300 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
19301 instruct ReplL_imm(vec dst, immL con) %{
19302   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19303   match(Set dst (Replicate con));
19304   format %{ "replicateL $dst,$con" %}
19305   ins_encode %{
19306     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19307     int vlen = Matcher::vector_length_in_bytes(this);
19308     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
19309   %}
19310   ins_pipe( pipe_slow );
19311 %}
19312 
19313 instruct ReplL_zero(vec dst, immL0 zero) %{
19314   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19315   match(Set dst (Replicate zero));
19316   format %{ "replicateL $dst,$zero" %}
19317   ins_encode %{
19318     int vlen_enc = vector_length_encoding(this);
19319     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19320       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19321     } else {
19322       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19323     }
19324   %}
19325   ins_pipe( fpu_reg_reg );
19326 %}
19327 
19328 instruct ReplL_M1(vec dst, immL_M1 con) %{
19329   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19330   match(Set dst (Replicate con));
19331   format %{ "vallones $dst" %}
19332   ins_encode %{
19333     int vector_len = vector_length_encoding(this);
19334     __ vallones($dst$$XMMRegister, vector_len);
19335   %}
19336   ins_pipe( pipe_slow );
19337 %}
19338 
19339 // ====================ReplicateF=======================================
19340 
19341 instruct vReplF_reg(vec dst, vlRegF src) %{
19342   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19343   match(Set dst (Replicate src));
19344   format %{ "replicateF $dst,$src" %}
19345   ins_encode %{
19346     uint vlen = Matcher::vector_length(this);
19347     int vlen_enc = vector_length_encoding(this);
19348     if (vlen <= 4) {
19349       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19350     } else if (VM_Version::supports_avx2()) {
19351       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19352     } else {
19353       assert(vlen == 8, "sanity");
19354       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19355       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19356     }
19357   %}
19358   ins_pipe( pipe_slow );
19359 %}
19360 
19361 instruct ReplF_reg(vec dst, vlRegF src) %{
19362   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19363   match(Set dst (Replicate src));
19364   format %{ "replicateF $dst,$src" %}
19365   ins_encode %{
19366     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19367   %}
19368   ins_pipe( pipe_slow );
19369 %}
19370 
19371 instruct ReplF_mem(vec dst, memory mem) %{
19372   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19373   match(Set dst (Replicate (LoadF mem)));
19374   format %{ "replicateF $dst,$mem" %}
19375   ins_encode %{
19376     int vlen_enc = vector_length_encoding(this);
19377     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19378   %}
19379   ins_pipe( pipe_slow );
19380 %}
19381 
19382 // Replicate float scalar immediate to be vector by loading from const table.
19383 instruct ReplF_imm(vec dst, immF con) %{
19384   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19385   match(Set dst (Replicate con));
19386   format %{ "replicateF $dst,$con" %}
19387   ins_encode %{
19388     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19389                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19390     int vlen = Matcher::vector_length_in_bytes(this);
19391     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19392   %}
19393   ins_pipe( pipe_slow );
19394 %}
19395 
19396 instruct ReplF_zero(vec dst, immF0 zero) %{
19397   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19398   match(Set dst (Replicate zero));
19399   format %{ "replicateF $dst,$zero" %}
19400   ins_encode %{
19401     int vlen_enc = vector_length_encoding(this);
19402     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19403       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19404     } else {
19405       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19406     }
19407   %}
19408   ins_pipe( fpu_reg_reg );
19409 %}
19410 
19411 // ====================ReplicateD=======================================
19412 
19413 // Replicate double (8 bytes) scalar to be vector
19414 instruct vReplD_reg(vec dst, vlRegD src) %{
19415   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19416   match(Set dst (Replicate src));
19417   format %{ "replicateD $dst,$src" %}
19418   ins_encode %{
19419     uint vlen = Matcher::vector_length(this);
19420     int vlen_enc = vector_length_encoding(this);
19421     if (vlen <= 2) {
19422       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19423     } else if (VM_Version::supports_avx2()) {
19424       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19425     } else {
19426       assert(vlen == 4, "sanity");
19427       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19428       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19429     }
19430   %}
19431   ins_pipe( pipe_slow );
19432 %}
19433 
19434 instruct ReplD_reg(vec dst, vlRegD src) %{
19435   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19436   match(Set dst (Replicate src));
19437   format %{ "replicateD $dst,$src" %}
19438   ins_encode %{
19439     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19440   %}
19441   ins_pipe( pipe_slow );
19442 %}
19443 
19444 instruct ReplD_mem(vec dst, memory mem) %{
19445   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19446   match(Set dst (Replicate (LoadD mem)));
19447   format %{ "replicateD $dst,$mem" %}
19448   ins_encode %{
19449     if (Matcher::vector_length(this) >= 4) {
19450       int vlen_enc = vector_length_encoding(this);
19451       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19452     } else {
19453       __ movddup($dst$$XMMRegister, $mem$$Address);
19454     }
19455   %}
19456   ins_pipe( pipe_slow );
19457 %}
19458 
19459 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19460 instruct ReplD_imm(vec dst, immD con) %{
19461   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19462   match(Set dst (Replicate con));
19463   format %{ "replicateD $dst,$con" %}
19464   ins_encode %{
19465     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19466     int vlen = Matcher::vector_length_in_bytes(this);
19467     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19468   %}
19469   ins_pipe( pipe_slow );
19470 %}
19471 
19472 instruct ReplD_zero(vec dst, immD0 zero) %{
19473   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19474   match(Set dst (Replicate zero));
19475   format %{ "replicateD $dst,$zero" %}
19476   ins_encode %{
19477     int vlen_enc = vector_length_encoding(this);
19478     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19479       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19480     } else {
19481       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19482     }
19483   %}
19484   ins_pipe( fpu_reg_reg );
19485 %}
19486 
19487 // ====================VECTOR INSERT=======================================
19488 
19489 instruct insert(vec dst, rRegI val, immU8 idx) %{
19490   predicate(Matcher::vector_length_in_bytes(n) < 32);
19491   match(Set dst (VectorInsert (Binary dst val) idx));
19492   format %{ "vector_insert $dst,$val,$idx" %}
19493   ins_encode %{
19494     assert(UseSSE >= 4, "required");
19495     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19496 
19497     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19498 
19499     assert(is_integral_type(elem_bt), "");
19500     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19501 
19502     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19503   %}
19504   ins_pipe( pipe_slow );
19505 %}
19506 
19507 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19508   predicate(Matcher::vector_length_in_bytes(n) == 32);
19509   match(Set dst (VectorInsert (Binary src val) idx));
19510   effect(TEMP vtmp);
19511   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19512   ins_encode %{
19513     int vlen_enc = Assembler::AVX_256bit;
19514     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19515     int elem_per_lane = 16/type2aelembytes(elem_bt);
19516     int log2epr = log2(elem_per_lane);
19517 
19518     assert(is_integral_type(elem_bt), "sanity");
19519     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19520 
19521     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19522     uint y_idx = ($idx$$constant >> log2epr) & 1;
19523     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19524     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19525     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19526   %}
19527   ins_pipe( pipe_slow );
19528 %}
19529 
19530 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19531   predicate(Matcher::vector_length_in_bytes(n) == 64);
19532   match(Set dst (VectorInsert (Binary src val) idx));
19533   effect(TEMP vtmp);
19534   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19535   ins_encode %{
19536     assert(UseAVX > 2, "sanity");
19537 
19538     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19539     int elem_per_lane = 16/type2aelembytes(elem_bt);
19540     int log2epr = log2(elem_per_lane);
19541 
19542     assert(is_integral_type(elem_bt), "");
19543     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19544 
19545     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19546     uint y_idx = ($idx$$constant >> log2epr) & 3;
19547     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19548     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19549     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19550   %}
19551   ins_pipe( pipe_slow );
19552 %}
19553 
19554 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19555   predicate(Matcher::vector_length(n) == 2);
19556   match(Set dst (VectorInsert (Binary dst val) idx));
19557   format %{ "vector_insert $dst,$val,$idx" %}
19558   ins_encode %{
19559     assert(UseSSE >= 4, "required");
19560     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19561     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19562 
19563     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19564   %}
19565   ins_pipe( pipe_slow );
19566 %}
19567 
19568 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19569   predicate(Matcher::vector_length(n) == 4);
19570   match(Set dst (VectorInsert (Binary src val) idx));
19571   effect(TEMP vtmp);
19572   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19573   ins_encode %{
19574     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19575     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19576 
19577     uint x_idx = $idx$$constant & right_n_bits(1);
19578     uint y_idx = ($idx$$constant >> 1) & 1;
19579     int vlen_enc = Assembler::AVX_256bit;
19580     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19581     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19582     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19583   %}
19584   ins_pipe( pipe_slow );
19585 %}
19586 
19587 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19588   predicate(Matcher::vector_length(n) == 8);
19589   match(Set dst (VectorInsert (Binary src val) idx));
19590   effect(TEMP vtmp);
19591   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19592   ins_encode %{
19593     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19594     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19595 
19596     uint x_idx = $idx$$constant & right_n_bits(1);
19597     uint y_idx = ($idx$$constant >> 1) & 3;
19598     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19599     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19600     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19601   %}
19602   ins_pipe( pipe_slow );
19603 %}
19604 
19605 instruct insertF(vec dst, regF val, immU8 idx) %{
19606   predicate(Matcher::vector_length(n) < 8);
19607   match(Set dst (VectorInsert (Binary dst val) idx));
19608   format %{ "vector_insert $dst,$val,$idx" %}
19609   ins_encode %{
19610     assert(UseSSE >= 4, "sanity");
19611 
19612     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19613     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19614 
19615     uint x_idx = $idx$$constant & right_n_bits(2);
19616     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19617   %}
19618   ins_pipe( pipe_slow );
19619 %}
19620 
19621 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19622   predicate(Matcher::vector_length(n) >= 8);
19623   match(Set dst (VectorInsert (Binary src val) idx));
19624   effect(TEMP vtmp);
19625   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19626   ins_encode %{
19627     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19628     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19629 
19630     int vlen = Matcher::vector_length(this);
19631     uint x_idx = $idx$$constant & right_n_bits(2);
19632     if (vlen == 8) {
19633       uint y_idx = ($idx$$constant >> 2) & 1;
19634       int vlen_enc = Assembler::AVX_256bit;
19635       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19636       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19637       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19638     } else {
19639       assert(vlen == 16, "sanity");
19640       uint y_idx = ($idx$$constant >> 2) & 3;
19641       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19642       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19643       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19644     }
19645   %}
19646   ins_pipe( pipe_slow );
19647 %}
19648 
19649 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19650   predicate(Matcher::vector_length(n) == 2);
19651   match(Set dst (VectorInsert (Binary dst val) idx));
19652   effect(TEMP tmp);
19653   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19654   ins_encode %{
19655     assert(UseSSE >= 4, "sanity");
19656     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19657     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19658 
19659     __ movq($tmp$$Register, $val$$XMMRegister);
19660     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19661   %}
19662   ins_pipe( pipe_slow );
19663 %}
19664 
19665 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19666   predicate(Matcher::vector_length(n) == 4);
19667   match(Set dst (VectorInsert (Binary src val) idx));
19668   effect(TEMP vtmp, TEMP tmp);
19669   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19670   ins_encode %{
19671     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19672     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19673 
19674     uint x_idx = $idx$$constant & right_n_bits(1);
19675     uint y_idx = ($idx$$constant >> 1) & 1;
19676     int vlen_enc = Assembler::AVX_256bit;
19677     __ movq($tmp$$Register, $val$$XMMRegister);
19678     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19679     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19680     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19681   %}
19682   ins_pipe( pipe_slow );
19683 %}
19684 
19685 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19686   predicate(Matcher::vector_length(n) == 8);
19687   match(Set dst (VectorInsert (Binary src val) idx));
19688   effect(TEMP tmp, TEMP vtmp);
19689   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19690   ins_encode %{
19691     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19692     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19693 
19694     uint x_idx = $idx$$constant & right_n_bits(1);
19695     uint y_idx = ($idx$$constant >> 1) & 3;
19696     __ movq($tmp$$Register, $val$$XMMRegister);
19697     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19698     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19699     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19700   %}
19701   ins_pipe( pipe_slow );
19702 %}
19703 
19704 // ====================REDUCTION ARITHMETIC=======================================
19705 
19706 // =======================Int Reduction==========================================
19707 
19708 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19709   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19710   match(Set dst (AddReductionVI src1 src2));
19711   match(Set dst (MulReductionVI src1 src2));
19712   match(Set dst (AndReductionV  src1 src2));
19713   match(Set dst ( OrReductionV  src1 src2));
19714   match(Set dst (XorReductionV  src1 src2));
19715   match(Set dst (MinReductionV  src1 src2));
19716   match(Set dst (MaxReductionV  src1 src2));
19717   match(Set dst (UMinReductionV  src1 src2));
19718   match(Set dst (UMaxReductionV  src1 src2));
19719   effect(TEMP vtmp1, TEMP vtmp2);
19720   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19721   ins_encode %{
19722     int opcode = this->ideal_Opcode();
19723     int vlen = Matcher::vector_length(this, $src2);
19724     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19725   %}
19726   ins_pipe( pipe_slow );
19727 %}
19728 
19729 // =======================Long Reduction==========================================
19730 
19731 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19732   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19733   match(Set dst (AddReductionVL src1 src2));
19734   match(Set dst (MulReductionVL src1 src2));
19735   match(Set dst (AndReductionV  src1 src2));
19736   match(Set dst ( OrReductionV  src1 src2));
19737   match(Set dst (XorReductionV  src1 src2));
19738   match(Set dst (MinReductionV  src1 src2));
19739   match(Set dst (MaxReductionV  src1 src2));
19740   match(Set dst (UMinReductionV  src1 src2));
19741   match(Set dst (UMaxReductionV  src1 src2));
19742   effect(TEMP vtmp1, TEMP vtmp2);
19743   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19744   ins_encode %{
19745     int opcode = this->ideal_Opcode();
19746     int vlen = Matcher::vector_length(this, $src2);
19747     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19748   %}
19749   ins_pipe( pipe_slow );
19750 %}
19751 
19752 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19753   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19754   match(Set dst (AddReductionVL src1 src2));
19755   match(Set dst (MulReductionVL src1 src2));
19756   match(Set dst (AndReductionV  src1 src2));
19757   match(Set dst ( OrReductionV  src1 src2));
19758   match(Set dst (XorReductionV  src1 src2));
19759   match(Set dst (MinReductionV  src1 src2));
19760   match(Set dst (MaxReductionV  src1 src2));
19761   match(Set dst (UMinReductionV  src1 src2));
19762   match(Set dst (UMaxReductionV  src1 src2));
19763   effect(TEMP vtmp1, TEMP vtmp2);
19764   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19765   ins_encode %{
19766     int opcode = this->ideal_Opcode();
19767     int vlen = Matcher::vector_length(this, $src2);
19768     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19769   %}
19770   ins_pipe( pipe_slow );
19771 %}
19772 
19773 // =======================Float Reduction==========================================
19774 
19775 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19776   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19777   match(Set dst (AddReductionVF dst src));
19778   match(Set dst (MulReductionVF dst src));
19779   effect(TEMP dst, TEMP vtmp);
19780   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19781   ins_encode %{
19782     int opcode = this->ideal_Opcode();
19783     int vlen = Matcher::vector_length(this, $src);
19784     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19785   %}
19786   ins_pipe( pipe_slow );
19787 %}
19788 
19789 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19790   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19791   match(Set dst (AddReductionVF dst src));
19792   match(Set dst (MulReductionVF dst src));
19793   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19794   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19795   ins_encode %{
19796     int opcode = this->ideal_Opcode();
19797     int vlen = Matcher::vector_length(this, $src);
19798     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19799   %}
19800   ins_pipe( pipe_slow );
19801 %}
19802 
19803 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19804   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19805   match(Set dst (AddReductionVF dst src));
19806   match(Set dst (MulReductionVF dst src));
19807   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19808   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19809   ins_encode %{
19810     int opcode = this->ideal_Opcode();
19811     int vlen = Matcher::vector_length(this, $src);
19812     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19813   %}
19814   ins_pipe( pipe_slow );
19815 %}
19816 
19817 
19818 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19819   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19820   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19821   // src1 contains reduction identity
19822   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19823   match(Set dst (AddReductionVF src1 src2));
19824   match(Set dst (MulReductionVF src1 src2));
19825   effect(TEMP dst);
19826   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19827   ins_encode %{
19828     int opcode = this->ideal_Opcode();
19829     int vlen = Matcher::vector_length(this, $src2);
19830     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19831   %}
19832   ins_pipe( pipe_slow );
19833 %}
19834 
19835 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19836   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19837   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19838   // src1 contains reduction identity
19839   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19840   match(Set dst (AddReductionVF src1 src2));
19841   match(Set dst (MulReductionVF src1 src2));
19842   effect(TEMP dst, TEMP vtmp);
19843   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19844   ins_encode %{
19845     int opcode = this->ideal_Opcode();
19846     int vlen = Matcher::vector_length(this, $src2);
19847     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19848   %}
19849   ins_pipe( pipe_slow );
19850 %}
19851 
19852 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19853   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19854   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19855   // src1 contains reduction identity
19856   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19857   match(Set dst (AddReductionVF src1 src2));
19858   match(Set dst (MulReductionVF src1 src2));
19859   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19860   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19861   ins_encode %{
19862     int opcode = this->ideal_Opcode();
19863     int vlen = Matcher::vector_length(this, $src2);
19864     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19865   %}
19866   ins_pipe( pipe_slow );
19867 %}
19868 
19869 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19870   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19871   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19872   // src1 contains reduction identity
19873   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19874   match(Set dst (AddReductionVF src1 src2));
19875   match(Set dst (MulReductionVF src1 src2));
19876   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19877   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19878   ins_encode %{
19879     int opcode = this->ideal_Opcode();
19880     int vlen = Matcher::vector_length(this, $src2);
19881     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19882   %}
19883   ins_pipe( pipe_slow );
19884 %}
19885 
19886 // =======================Double Reduction==========================================
19887 
19888 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19889   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19890   match(Set dst (AddReductionVD dst src));
19891   match(Set dst (MulReductionVD dst src));
19892   effect(TEMP dst, TEMP vtmp);
19893   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19894   ins_encode %{
19895     int opcode = this->ideal_Opcode();
19896     int vlen = Matcher::vector_length(this, $src);
19897     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19898 %}
19899   ins_pipe( pipe_slow );
19900 %}
19901 
19902 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19903   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19904   match(Set dst (AddReductionVD dst src));
19905   match(Set dst (MulReductionVD dst src));
19906   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19907   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19908   ins_encode %{
19909     int opcode = this->ideal_Opcode();
19910     int vlen = Matcher::vector_length(this, $src);
19911     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19912   %}
19913   ins_pipe( pipe_slow );
19914 %}
19915 
19916 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19917   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19918   match(Set dst (AddReductionVD dst src));
19919   match(Set dst (MulReductionVD dst src));
19920   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19921   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19922   ins_encode %{
19923     int opcode = this->ideal_Opcode();
19924     int vlen = Matcher::vector_length(this, $src);
19925     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19926   %}
19927   ins_pipe( pipe_slow );
19928 %}
19929 
19930 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19931   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19932   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19933   // src1 contains reduction identity
19934   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19935   match(Set dst (AddReductionVD src1 src2));
19936   match(Set dst (MulReductionVD src1 src2));
19937   effect(TEMP dst);
19938   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19939   ins_encode %{
19940     int opcode = this->ideal_Opcode();
19941     int vlen = Matcher::vector_length(this, $src2);
19942     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19943 %}
19944   ins_pipe( pipe_slow );
19945 %}
19946 
19947 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19948   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19949   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19950   // src1 contains reduction identity
19951   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19952   match(Set dst (AddReductionVD src1 src2));
19953   match(Set dst (MulReductionVD src1 src2));
19954   effect(TEMP dst, TEMP vtmp);
19955   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19956   ins_encode %{
19957     int opcode = this->ideal_Opcode();
19958     int vlen = Matcher::vector_length(this, $src2);
19959     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19960   %}
19961   ins_pipe( pipe_slow );
19962 %}
19963 
19964 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19965   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19966   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19967   // src1 contains reduction identity
19968   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19969   match(Set dst (AddReductionVD src1 src2));
19970   match(Set dst (MulReductionVD src1 src2));
19971   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19972   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19973   ins_encode %{
19974     int opcode = this->ideal_Opcode();
19975     int vlen = Matcher::vector_length(this, $src2);
19976     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19977   %}
19978   ins_pipe( pipe_slow );
19979 %}
19980 
19981 // =======================Byte Reduction==========================================
19982 
19983 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19984   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19985   match(Set dst (AddReductionVI src1 src2));
19986   match(Set dst (AndReductionV  src1 src2));
19987   match(Set dst ( OrReductionV  src1 src2));
19988   match(Set dst (XorReductionV  src1 src2));
19989   match(Set dst (MinReductionV  src1 src2));
19990   match(Set dst (MaxReductionV  src1 src2));
19991   match(Set dst (UMinReductionV  src1 src2));
19992   match(Set dst (UMaxReductionV  src1 src2));
19993   effect(TEMP vtmp1, TEMP vtmp2);
19994   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19995   ins_encode %{
19996     int opcode = this->ideal_Opcode();
19997     int vlen = Matcher::vector_length(this, $src2);
19998     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19999   %}
20000   ins_pipe( pipe_slow );
20001 %}
20002 
20003 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
20004   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
20005   match(Set dst (AddReductionVI src1 src2));
20006   match(Set dst (AndReductionV  src1 src2));
20007   match(Set dst ( OrReductionV  src1 src2));
20008   match(Set dst (XorReductionV  src1 src2));
20009   match(Set dst (MinReductionV  src1 src2));
20010   match(Set dst (MaxReductionV  src1 src2));
20011   match(Set dst (UMinReductionV  src1 src2));
20012   match(Set dst (UMaxReductionV  src1 src2));
20013   effect(TEMP vtmp1, TEMP vtmp2);
20014   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
20015   ins_encode %{
20016     int opcode = this->ideal_Opcode();
20017     int vlen = Matcher::vector_length(this, $src2);
20018     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20019   %}
20020   ins_pipe( pipe_slow );
20021 %}
20022 
20023 // =======================Short Reduction==========================================
20024 
20025 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
20026   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
20027   match(Set dst (AddReductionVI src1 src2));
20028   match(Set dst (MulReductionVI src1 src2));
20029   match(Set dst (AndReductionV  src1 src2));
20030   match(Set dst ( OrReductionV  src1 src2));
20031   match(Set dst (XorReductionV  src1 src2));
20032   match(Set dst (MinReductionV  src1 src2));
20033   match(Set dst (MaxReductionV  src1 src2));
20034   match(Set dst (UMinReductionV  src1 src2));
20035   match(Set dst (UMaxReductionV  src1 src2));
20036   effect(TEMP vtmp1, TEMP vtmp2);
20037   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
20038   ins_encode %{
20039     int opcode = this->ideal_Opcode();
20040     int vlen = Matcher::vector_length(this, $src2);
20041     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20042   %}
20043   ins_pipe( pipe_slow );
20044 %}
20045 
20046 // =======================Mul Reduction==========================================
20047 
20048 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
20049   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
20050             Matcher::vector_length(n->in(2)) <= 32); // src2
20051   match(Set dst (MulReductionVI src1 src2));
20052   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
20053   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
20054   ins_encode %{
20055     int opcode = this->ideal_Opcode();
20056     int vlen = Matcher::vector_length(this, $src2);
20057     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20058   %}
20059   ins_pipe( pipe_slow );
20060 %}
20061 
20062 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
20063   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
20064             Matcher::vector_length(n->in(2)) == 64); // src2
20065   match(Set dst (MulReductionVI src1 src2));
20066   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
20067   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
20068   ins_encode %{
20069     int opcode = this->ideal_Opcode();
20070     int vlen = Matcher::vector_length(this, $src2);
20071     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20072   %}
20073   ins_pipe( pipe_slow );
20074 %}
20075 
20076 //--------------------Min/Max Float Reduction --------------------
20077 // Float Min Reduction
20078 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20079                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20080   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20081             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20082              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20083             Matcher::vector_length(n->in(2)) == 2);
20084   match(Set dst (MinReductionV src1 src2));
20085   match(Set dst (MaxReductionV src1 src2));
20086   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20087   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20088   ins_encode %{
20089     assert(UseAVX > 0, "sanity");
20090 
20091     int opcode = this->ideal_Opcode();
20092     int vlen = Matcher::vector_length(this, $src2);
20093     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20094                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20095   %}
20096   ins_pipe( pipe_slow );
20097 %}
20098 
20099 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20100                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20101   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20102             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20103              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20104             Matcher::vector_length(n->in(2)) >= 4);
20105   match(Set dst (MinReductionV src1 src2));
20106   match(Set dst (MaxReductionV src1 src2));
20107   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20108   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20109   ins_encode %{
20110     assert(UseAVX > 0, "sanity");
20111 
20112     int opcode = this->ideal_Opcode();
20113     int vlen = Matcher::vector_length(this, $src2);
20114     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20115                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20116   %}
20117   ins_pipe( pipe_slow );
20118 %}
20119 
20120 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
20121                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20122   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20123             Matcher::vector_length(n->in(2)) == 2);
20124   match(Set dst (MinReductionV dst src));
20125   match(Set dst (MaxReductionV dst src));
20126   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20127   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20128   ins_encode %{
20129     assert(UseAVX > 0, "sanity");
20130 
20131     int opcode = this->ideal_Opcode();
20132     int vlen = Matcher::vector_length(this, $src);
20133     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20134                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20135   %}
20136   ins_pipe( pipe_slow );
20137 %}
20138 
20139 
20140 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
20141                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20142   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20143             Matcher::vector_length(n->in(2)) >= 4);
20144   match(Set dst (MinReductionV dst src));
20145   match(Set dst (MaxReductionV dst src));
20146   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20147   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20148   ins_encode %{
20149     assert(UseAVX > 0, "sanity");
20150 
20151     int opcode = this->ideal_Opcode();
20152     int vlen = Matcher::vector_length(this, $src);
20153     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20154                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20155   %}
20156   ins_pipe( pipe_slow );
20157 %}
20158 
20159 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
20160   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20161             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20162              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20163             Matcher::vector_length(n->in(2)) == 2);
20164   match(Set dst (MinReductionV src1 src2));
20165   match(Set dst (MaxReductionV src1 src2));
20166   effect(TEMP dst, TEMP xtmp1);
20167   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
20168   ins_encode %{
20169     int opcode = this->ideal_Opcode();
20170     int vlen = Matcher::vector_length(this, $src2);
20171     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20172                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20173   %}
20174   ins_pipe( pipe_slow );
20175 %}
20176 
20177 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
20178   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20179             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20180              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20181             Matcher::vector_length(n->in(2)) >= 4);
20182   match(Set dst (MinReductionV src1 src2));
20183   match(Set dst (MaxReductionV src1 src2));
20184   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20185   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
20186   ins_encode %{
20187     int opcode = this->ideal_Opcode();
20188     int vlen = Matcher::vector_length(this, $src2);
20189     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20190                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20191   %}
20192   ins_pipe( pipe_slow );
20193 %}
20194 
20195 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
20196   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20197             Matcher::vector_length(n->in(2)) == 2);
20198   match(Set dst (MinReductionV dst src));
20199   match(Set dst (MaxReductionV dst src));
20200   effect(TEMP dst, TEMP xtmp1);
20201   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
20202   ins_encode %{
20203     int opcode = this->ideal_Opcode();
20204     int vlen = Matcher::vector_length(this, $src);
20205     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20206                          $xtmp1$$XMMRegister);
20207   %}
20208   ins_pipe( pipe_slow );
20209 %}
20210 
20211 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
20212   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20213             Matcher::vector_length(n->in(2)) >= 4);
20214   match(Set dst (MinReductionV dst src));
20215   match(Set dst (MaxReductionV dst src));
20216   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20217   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
20218   ins_encode %{
20219     int opcode = this->ideal_Opcode();
20220     int vlen = Matcher::vector_length(this, $src);
20221     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20222                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20223   %}
20224   ins_pipe( pipe_slow );
20225 %}
20226 
20227 //--------------------Min Double Reduction --------------------
20228 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20229                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20230   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20231             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20232              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20233             Matcher::vector_length(n->in(2)) == 2);
20234   match(Set dst (MinReductionV src1 src2));
20235   match(Set dst (MaxReductionV src1 src2));
20236   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20237   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20238   ins_encode %{
20239     assert(UseAVX > 0, "sanity");
20240 
20241     int opcode = this->ideal_Opcode();
20242     int vlen = Matcher::vector_length(this, $src2);
20243     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20244                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20245   %}
20246   ins_pipe( pipe_slow );
20247 %}
20248 
20249 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20250                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20251   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20252             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20253              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20254             Matcher::vector_length(n->in(2)) >= 4);
20255   match(Set dst (MinReductionV src1 src2));
20256   match(Set dst (MaxReductionV src1 src2));
20257   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20258   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20259   ins_encode %{
20260     assert(UseAVX > 0, "sanity");
20261 
20262     int opcode = this->ideal_Opcode();
20263     int vlen = Matcher::vector_length(this, $src2);
20264     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20265                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20266   %}
20267   ins_pipe( pipe_slow );
20268 %}
20269 
20270 
20271 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
20272                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20273   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20274             Matcher::vector_length(n->in(2)) == 2);
20275   match(Set dst (MinReductionV dst src));
20276   match(Set dst (MaxReductionV dst src));
20277   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20278   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20279   ins_encode %{
20280     assert(UseAVX > 0, "sanity");
20281 
20282     int opcode = this->ideal_Opcode();
20283     int vlen = Matcher::vector_length(this, $src);
20284     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20285                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20286   %}
20287   ins_pipe( pipe_slow );
20288 %}
20289 
20290 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
20291                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20292   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20293             Matcher::vector_length(n->in(2)) >= 4);
20294   match(Set dst (MinReductionV dst src));
20295   match(Set dst (MaxReductionV dst src));
20296   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20297   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20298   ins_encode %{
20299     assert(UseAVX > 0, "sanity");
20300 
20301     int opcode = this->ideal_Opcode();
20302     int vlen = Matcher::vector_length(this, $src);
20303     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20304                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20305   %}
20306   ins_pipe( pipe_slow );
20307 %}
20308 
20309 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
20310   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20311             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20312              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20313             Matcher::vector_length(n->in(2)) == 2);
20314   match(Set dst (MinReductionV src1 src2));
20315   match(Set dst (MaxReductionV src1 src2));
20316   effect(TEMP dst, TEMP xtmp1);
20317   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20318   ins_encode %{
20319     int opcode = this->ideal_Opcode();
20320     int vlen = Matcher::vector_length(this, $src2);
20321     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20322                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
20323   %}
20324   ins_pipe( pipe_slow );
20325 %}
20326 
20327 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20328   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20329             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20330              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20331             Matcher::vector_length(n->in(2)) >= 4);
20332   match(Set dst (MinReductionV src1 src2));
20333   match(Set dst (MaxReductionV src1 src2));
20334   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20335   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20336   ins_encode %{
20337     int opcode = this->ideal_Opcode();
20338     int vlen = Matcher::vector_length(this, $src2);
20339     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20340                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20341   %}
20342   ins_pipe( pipe_slow );
20343 %}
20344 
20345 
20346 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20347   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20348             Matcher::vector_length(n->in(2)) == 2);
20349   match(Set dst (MinReductionV dst src));
20350   match(Set dst (MaxReductionV dst src));
20351   effect(TEMP dst, TEMP xtmp1);
20352   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20353   ins_encode %{
20354     int opcode = this->ideal_Opcode();
20355     int vlen = Matcher::vector_length(this, $src);
20356     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20357                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20358   %}
20359   ins_pipe( pipe_slow );
20360 %}
20361 
20362 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20363   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20364             Matcher::vector_length(n->in(2)) >= 4);
20365   match(Set dst (MinReductionV dst src));
20366   match(Set dst (MaxReductionV dst src));
20367   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20368   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20369   ins_encode %{
20370     int opcode = this->ideal_Opcode();
20371     int vlen = Matcher::vector_length(this, $src);
20372     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20373                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20374   %}
20375   ins_pipe( pipe_slow );
20376 %}
20377 
20378 // ====================VECTOR ARITHMETIC=======================================
20379 
20380 // --------------------------------- ADD --------------------------------------
20381 
20382 // Bytes vector add
20383 instruct vaddB(vec dst, vec src) %{
20384   predicate(UseAVX == 0);
20385   match(Set dst (AddVB dst src));
20386   format %{ "paddb   $dst,$src\t! add packedB" %}
20387   ins_encode %{
20388     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20389   %}
20390   ins_pipe( pipe_slow );
20391 %}
20392 
20393 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20394   predicate(UseAVX > 0);
20395   match(Set dst (AddVB src1 src2));
20396   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
20397   ins_encode %{
20398     int vlen_enc = vector_length_encoding(this);
20399     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20400   %}
20401   ins_pipe( pipe_slow );
20402 %}
20403 
20404 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20405   predicate((UseAVX > 0) &&
20406             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20407   match(Set dst (AddVB src (LoadVector mem)));
20408   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
20409   ins_encode %{
20410     int vlen_enc = vector_length_encoding(this);
20411     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20412   %}
20413   ins_pipe( pipe_slow );
20414 %}
20415 
20416 // Shorts/Chars vector add
20417 instruct vaddS(vec dst, vec src) %{
20418   predicate(UseAVX == 0);
20419   match(Set dst (AddVS dst src));
20420   format %{ "paddw   $dst,$src\t! add packedS" %}
20421   ins_encode %{
20422     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20423   %}
20424   ins_pipe( pipe_slow );
20425 %}
20426 
20427 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20428   predicate(UseAVX > 0);
20429   match(Set dst (AddVS src1 src2));
20430   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
20431   ins_encode %{
20432     int vlen_enc = vector_length_encoding(this);
20433     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20434   %}
20435   ins_pipe( pipe_slow );
20436 %}
20437 
20438 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20439   predicate((UseAVX > 0) &&
20440             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20441   match(Set dst (AddVS src (LoadVector mem)));
20442   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
20443   ins_encode %{
20444     int vlen_enc = vector_length_encoding(this);
20445     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20446   %}
20447   ins_pipe( pipe_slow );
20448 %}
20449 
20450 // Integers vector add
20451 instruct vaddI(vec dst, vec src) %{
20452   predicate(UseAVX == 0);
20453   match(Set dst (AddVI dst src));
20454   format %{ "paddd   $dst,$src\t! add packedI" %}
20455   ins_encode %{
20456     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20457   %}
20458   ins_pipe( pipe_slow );
20459 %}
20460 
20461 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20462   predicate(UseAVX > 0);
20463   match(Set dst (AddVI src1 src2));
20464   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
20465   ins_encode %{
20466     int vlen_enc = vector_length_encoding(this);
20467     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20468   %}
20469   ins_pipe( pipe_slow );
20470 %}
20471 
20472 
20473 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20474   predicate((UseAVX > 0) &&
20475             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20476   match(Set dst (AddVI src (LoadVector mem)));
20477   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
20478   ins_encode %{
20479     int vlen_enc = vector_length_encoding(this);
20480     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20481   %}
20482   ins_pipe( pipe_slow );
20483 %}
20484 
20485 // Longs vector add
20486 instruct vaddL(vec dst, vec src) %{
20487   predicate(UseAVX == 0);
20488   match(Set dst (AddVL dst src));
20489   format %{ "paddq   $dst,$src\t! add packedL" %}
20490   ins_encode %{
20491     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20492   %}
20493   ins_pipe( pipe_slow );
20494 %}
20495 
20496 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20497   predicate(UseAVX > 0);
20498   match(Set dst (AddVL src1 src2));
20499   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
20500   ins_encode %{
20501     int vlen_enc = vector_length_encoding(this);
20502     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20503   %}
20504   ins_pipe( pipe_slow );
20505 %}
20506 
20507 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20508   predicate((UseAVX > 0) &&
20509             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20510   match(Set dst (AddVL src (LoadVector mem)));
20511   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
20512   ins_encode %{
20513     int vlen_enc = vector_length_encoding(this);
20514     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20515   %}
20516   ins_pipe( pipe_slow );
20517 %}
20518 
20519 // Floats vector add
20520 instruct vaddF(vec dst, vec src) %{
20521   predicate(UseAVX == 0);
20522   match(Set dst (AddVF dst src));
20523   format %{ "addps   $dst,$src\t! add packedF" %}
20524   ins_encode %{
20525     __ addps($dst$$XMMRegister, $src$$XMMRegister);
20526   %}
20527   ins_pipe( pipe_slow );
20528 %}
20529 
20530 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20531   predicate(UseAVX > 0);
20532   match(Set dst (AddVF src1 src2));
20533   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20534   ins_encode %{
20535     int vlen_enc = vector_length_encoding(this);
20536     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20537   %}
20538   ins_pipe( pipe_slow );
20539 %}
20540 
20541 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20542   predicate((UseAVX > 0) &&
20543             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20544   match(Set dst (AddVF src (LoadVector mem)));
20545   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20546   ins_encode %{
20547     int vlen_enc = vector_length_encoding(this);
20548     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20549   %}
20550   ins_pipe( pipe_slow );
20551 %}
20552 
20553 // Doubles vector add
20554 instruct vaddD(vec dst, vec src) %{
20555   predicate(UseAVX == 0);
20556   match(Set dst (AddVD dst src));
20557   format %{ "addpd   $dst,$src\t! add packedD" %}
20558   ins_encode %{
20559     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20560   %}
20561   ins_pipe( pipe_slow );
20562 %}
20563 
20564 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20565   predicate(UseAVX > 0);
20566   match(Set dst (AddVD src1 src2));
20567   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20568   ins_encode %{
20569     int vlen_enc = vector_length_encoding(this);
20570     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20571   %}
20572   ins_pipe( pipe_slow );
20573 %}
20574 
20575 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20576   predicate((UseAVX > 0) &&
20577             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20578   match(Set dst (AddVD src (LoadVector mem)));
20579   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20580   ins_encode %{
20581     int vlen_enc = vector_length_encoding(this);
20582     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20583   %}
20584   ins_pipe( pipe_slow );
20585 %}
20586 
20587 // --------------------------------- SUB --------------------------------------
20588 
20589 // Bytes vector sub
20590 instruct vsubB(vec dst, vec src) %{
20591   predicate(UseAVX == 0);
20592   match(Set dst (SubVB dst src));
20593   format %{ "psubb   $dst,$src\t! sub packedB" %}
20594   ins_encode %{
20595     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20596   %}
20597   ins_pipe( pipe_slow );
20598 %}
20599 
20600 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20601   predicate(UseAVX > 0);
20602   match(Set dst (SubVB src1 src2));
20603   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20604   ins_encode %{
20605     int vlen_enc = vector_length_encoding(this);
20606     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20607   %}
20608   ins_pipe( pipe_slow );
20609 %}
20610 
20611 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20612   predicate((UseAVX > 0) &&
20613             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20614   match(Set dst (SubVB src (LoadVector mem)));
20615   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20616   ins_encode %{
20617     int vlen_enc = vector_length_encoding(this);
20618     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20619   %}
20620   ins_pipe( pipe_slow );
20621 %}
20622 
20623 // Shorts/Chars vector sub
20624 instruct vsubS(vec dst, vec src) %{
20625   predicate(UseAVX == 0);
20626   match(Set dst (SubVS dst src));
20627   format %{ "psubw   $dst,$src\t! sub packedS" %}
20628   ins_encode %{
20629     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20630   %}
20631   ins_pipe( pipe_slow );
20632 %}
20633 
20634 
20635 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20636   predicate(UseAVX > 0);
20637   match(Set dst (SubVS src1 src2));
20638   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20639   ins_encode %{
20640     int vlen_enc = vector_length_encoding(this);
20641     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20642   %}
20643   ins_pipe( pipe_slow );
20644 %}
20645 
20646 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20647   predicate((UseAVX > 0) &&
20648             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20649   match(Set dst (SubVS src (LoadVector mem)));
20650   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20651   ins_encode %{
20652     int vlen_enc = vector_length_encoding(this);
20653     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20654   %}
20655   ins_pipe( pipe_slow );
20656 %}
20657 
20658 // Integers vector sub
20659 instruct vsubI(vec dst, vec src) %{
20660   predicate(UseAVX == 0);
20661   match(Set dst (SubVI dst src));
20662   format %{ "psubd   $dst,$src\t! sub packedI" %}
20663   ins_encode %{
20664     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20665   %}
20666   ins_pipe( pipe_slow );
20667 %}
20668 
20669 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20670   predicate(UseAVX > 0);
20671   match(Set dst (SubVI src1 src2));
20672   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20673   ins_encode %{
20674     int vlen_enc = vector_length_encoding(this);
20675     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20676   %}
20677   ins_pipe( pipe_slow );
20678 %}
20679 
20680 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20681   predicate((UseAVX > 0) &&
20682             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20683   match(Set dst (SubVI src (LoadVector mem)));
20684   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20685   ins_encode %{
20686     int vlen_enc = vector_length_encoding(this);
20687     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20688   %}
20689   ins_pipe( pipe_slow );
20690 %}
20691 
20692 // Longs vector sub
20693 instruct vsubL(vec dst, vec src) %{
20694   predicate(UseAVX == 0);
20695   match(Set dst (SubVL dst src));
20696   format %{ "psubq   $dst,$src\t! sub packedL" %}
20697   ins_encode %{
20698     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20699   %}
20700   ins_pipe( pipe_slow );
20701 %}
20702 
20703 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20704   predicate(UseAVX > 0);
20705   match(Set dst (SubVL src1 src2));
20706   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20707   ins_encode %{
20708     int vlen_enc = vector_length_encoding(this);
20709     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20710   %}
20711   ins_pipe( pipe_slow );
20712 %}
20713 
20714 
20715 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20716   predicate((UseAVX > 0) &&
20717             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20718   match(Set dst (SubVL src (LoadVector mem)));
20719   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20720   ins_encode %{
20721     int vlen_enc = vector_length_encoding(this);
20722     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20723   %}
20724   ins_pipe( pipe_slow );
20725 %}
20726 
20727 // Floats vector sub
20728 instruct vsubF(vec dst, vec src) %{
20729   predicate(UseAVX == 0);
20730   match(Set dst (SubVF dst src));
20731   format %{ "subps   $dst,$src\t! sub packedF" %}
20732   ins_encode %{
20733     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20734   %}
20735   ins_pipe( pipe_slow );
20736 %}
20737 
20738 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20739   predicate(UseAVX > 0);
20740   match(Set dst (SubVF src1 src2));
20741   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20742   ins_encode %{
20743     int vlen_enc = vector_length_encoding(this);
20744     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20745   %}
20746   ins_pipe( pipe_slow );
20747 %}
20748 
20749 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20750   predicate((UseAVX > 0) &&
20751             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20752   match(Set dst (SubVF src (LoadVector mem)));
20753   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20754   ins_encode %{
20755     int vlen_enc = vector_length_encoding(this);
20756     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20757   %}
20758   ins_pipe( pipe_slow );
20759 %}
20760 
20761 // Doubles vector sub
20762 instruct vsubD(vec dst, vec src) %{
20763   predicate(UseAVX == 0);
20764   match(Set dst (SubVD dst src));
20765   format %{ "subpd   $dst,$src\t! sub packedD" %}
20766   ins_encode %{
20767     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20768   %}
20769   ins_pipe( pipe_slow );
20770 %}
20771 
20772 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20773   predicate(UseAVX > 0);
20774   match(Set dst (SubVD src1 src2));
20775   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20776   ins_encode %{
20777     int vlen_enc = vector_length_encoding(this);
20778     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20779   %}
20780   ins_pipe( pipe_slow );
20781 %}
20782 
20783 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20784   predicate((UseAVX > 0) &&
20785             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20786   match(Set dst (SubVD src (LoadVector mem)));
20787   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20788   ins_encode %{
20789     int vlen_enc = vector_length_encoding(this);
20790     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20791   %}
20792   ins_pipe( pipe_slow );
20793 %}
20794 
20795 // --------------------------------- MUL --------------------------------------
20796 
20797 // Byte vector mul
20798 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20799   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20800   match(Set dst (MulVB src1 src2));
20801   effect(TEMP dst, TEMP xtmp);
20802   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20803   ins_encode %{
20804     assert(UseSSE > 3, "required");
20805     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20806     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20807     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20808     __ psllw($dst$$XMMRegister, 8);
20809     __ psrlw($dst$$XMMRegister, 8);
20810     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20811   %}
20812   ins_pipe( pipe_slow );
20813 %}
20814 
20815 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20816   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20817   match(Set dst (MulVB src1 src2));
20818   effect(TEMP dst, TEMP xtmp);
20819   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20820   ins_encode %{
20821     assert(UseSSE > 3, "required");
20822     // Odd-index elements
20823     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20824     __ psrlw($dst$$XMMRegister, 8);
20825     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20826     __ psrlw($xtmp$$XMMRegister, 8);
20827     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20828     __ psllw($dst$$XMMRegister, 8);
20829     // Even-index elements
20830     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20831     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20832     __ psllw($xtmp$$XMMRegister, 8);
20833     __ psrlw($xtmp$$XMMRegister, 8);
20834     // Combine
20835     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20836   %}
20837   ins_pipe( pipe_slow );
20838 %}
20839 
20840 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20841   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20842   match(Set dst (MulVB src1 src2));
20843   effect(TEMP xtmp1, TEMP xtmp2);
20844   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20845   ins_encode %{
20846     int vlen_enc = vector_length_encoding(this);
20847     // Odd-index elements
20848     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20849     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20850     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20851     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20852     // Even-index elements
20853     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20854     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20855     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20856     // Combine
20857     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20858   %}
20859   ins_pipe( pipe_slow );
20860 %}
20861 
20862 // Shorts/Chars vector mul
20863 instruct vmulS(vec dst, vec src) %{
20864   predicate(UseAVX == 0);
20865   match(Set dst (MulVS dst src));
20866   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20867   ins_encode %{
20868     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20869   %}
20870   ins_pipe( pipe_slow );
20871 %}
20872 
20873 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20874   predicate(UseAVX > 0);
20875   match(Set dst (MulVS src1 src2));
20876   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20877   ins_encode %{
20878     int vlen_enc = vector_length_encoding(this);
20879     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20880   %}
20881   ins_pipe( pipe_slow );
20882 %}
20883 
20884 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20885   predicate((UseAVX > 0) &&
20886             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20887   match(Set dst (MulVS src (LoadVector mem)));
20888   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20889   ins_encode %{
20890     int vlen_enc = vector_length_encoding(this);
20891     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20892   %}
20893   ins_pipe( pipe_slow );
20894 %}
20895 
20896 // Integers vector mul
20897 instruct vmulI(vec dst, vec src) %{
20898   predicate(UseAVX == 0);
20899   match(Set dst (MulVI dst src));
20900   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20901   ins_encode %{
20902     assert(UseSSE > 3, "required");
20903     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20904   %}
20905   ins_pipe( pipe_slow );
20906 %}
20907 
20908 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20909   predicate(UseAVX > 0);
20910   match(Set dst (MulVI src1 src2));
20911   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20912   ins_encode %{
20913     int vlen_enc = vector_length_encoding(this);
20914     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20915   %}
20916   ins_pipe( pipe_slow );
20917 %}
20918 
20919 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20920   predicate((UseAVX > 0) &&
20921             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20922   match(Set dst (MulVI src (LoadVector mem)));
20923   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20924   ins_encode %{
20925     int vlen_enc = vector_length_encoding(this);
20926     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20927   %}
20928   ins_pipe( pipe_slow );
20929 %}
20930 
20931 // Longs vector mul
20932 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20933   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20934              VM_Version::supports_avx512dq()) ||
20935             VM_Version::supports_avx512vldq());
20936   match(Set dst (MulVL src1 src2));
20937   ins_cost(500);
20938   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20939   ins_encode %{
20940     assert(UseAVX > 2, "required");
20941     int vlen_enc = vector_length_encoding(this);
20942     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20943   %}
20944   ins_pipe( pipe_slow );
20945 %}
20946 
20947 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20948   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20949              VM_Version::supports_avx512dq()) ||
20950             (Matcher::vector_length_in_bytes(n) > 8 &&
20951              VM_Version::supports_avx512vldq()));
20952   match(Set dst (MulVL src (LoadVector mem)));
20953   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20954   ins_cost(500);
20955   ins_encode %{
20956     assert(UseAVX > 2, "required");
20957     int vlen_enc = vector_length_encoding(this);
20958     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20959   %}
20960   ins_pipe( pipe_slow );
20961 %}
20962 
20963 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20964   predicate(UseAVX == 0);
20965   match(Set dst (MulVL src1 src2));
20966   ins_cost(500);
20967   effect(TEMP dst, TEMP xtmp);
20968   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20969   ins_encode %{
20970     assert(VM_Version::supports_sse4_1(), "required");
20971     // Get the lo-hi products, only the lower 32 bits is in concerns
20972     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20973     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20974     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20975     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20976     __ psllq($dst$$XMMRegister, 32);
20977     // Get the lo-lo products
20978     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20979     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20980     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20981   %}
20982   ins_pipe( pipe_slow );
20983 %}
20984 
20985 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20986   predicate(UseAVX > 0 &&
20987             ((Matcher::vector_length_in_bytes(n) == 64 &&
20988               !VM_Version::supports_avx512dq()) ||
20989              (Matcher::vector_length_in_bytes(n) < 64 &&
20990               !VM_Version::supports_avx512vldq())));
20991   match(Set dst (MulVL src1 src2));
20992   effect(TEMP xtmp1, TEMP xtmp2);
20993   ins_cost(500);
20994   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20995   ins_encode %{
20996     int vlen_enc = vector_length_encoding(this);
20997     // Get the lo-hi products, only the lower 32 bits is in concerns
20998     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20999     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
21000     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
21001     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
21002     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
21003     // Get the lo-lo products
21004     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21005     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21006   %}
21007   ins_pipe( pipe_slow );
21008 %}
21009 
21010 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
21011   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
21012   match(Set dst (MulVL src1 src2));
21013   ins_cost(100);
21014   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
21015   ins_encode %{
21016     int vlen_enc = vector_length_encoding(this);
21017     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21018   %}
21019   ins_pipe( pipe_slow );
21020 %}
21021 
21022 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
21023   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
21024   match(Set dst (MulVL src1 src2));
21025   ins_cost(100);
21026   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
21027   ins_encode %{
21028     int vlen_enc = vector_length_encoding(this);
21029     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21030   %}
21031   ins_pipe( pipe_slow );
21032 %}
21033 
21034 // Floats vector mul
21035 instruct vmulF(vec dst, vec src) %{
21036   predicate(UseAVX == 0);
21037   match(Set dst (MulVF dst src));
21038   format %{ "mulps   $dst,$src\t! mul packedF" %}
21039   ins_encode %{
21040     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
21041   %}
21042   ins_pipe( pipe_slow );
21043 %}
21044 
21045 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
21046   predicate(UseAVX > 0);
21047   match(Set dst (MulVF src1 src2));
21048   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
21049   ins_encode %{
21050     int vlen_enc = vector_length_encoding(this);
21051     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21052   %}
21053   ins_pipe( pipe_slow );
21054 %}
21055 
21056 instruct vmulF_mem(vec dst, vec src, memory mem) %{
21057   predicate((UseAVX > 0) &&
21058             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21059   match(Set dst (MulVF src (LoadVector mem)));
21060   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
21061   ins_encode %{
21062     int vlen_enc = vector_length_encoding(this);
21063     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21064   %}
21065   ins_pipe( pipe_slow );
21066 %}
21067 
21068 // Doubles vector mul
21069 instruct vmulD(vec dst, vec src) %{
21070   predicate(UseAVX == 0);
21071   match(Set dst (MulVD dst src));
21072   format %{ "mulpd   $dst,$src\t! mul packedD" %}
21073   ins_encode %{
21074     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
21075   %}
21076   ins_pipe( pipe_slow );
21077 %}
21078 
21079 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
21080   predicate(UseAVX > 0);
21081   match(Set dst (MulVD src1 src2));
21082   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
21083   ins_encode %{
21084     int vlen_enc = vector_length_encoding(this);
21085     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21086   %}
21087   ins_pipe( pipe_slow );
21088 %}
21089 
21090 instruct vmulD_mem(vec dst, vec src, memory mem) %{
21091   predicate((UseAVX > 0) &&
21092             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21093   match(Set dst (MulVD src (LoadVector mem)));
21094   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
21095   ins_encode %{
21096     int vlen_enc = vector_length_encoding(this);
21097     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21098   %}
21099   ins_pipe( pipe_slow );
21100 %}
21101 
21102 // --------------------------------- DIV --------------------------------------
21103 
21104 // Floats vector div
21105 instruct vdivF(vec dst, vec src) %{
21106   predicate(UseAVX == 0);
21107   match(Set dst (DivVF dst src));
21108   format %{ "divps   $dst,$src\t! div packedF" %}
21109   ins_encode %{
21110     __ divps($dst$$XMMRegister, $src$$XMMRegister);
21111   %}
21112   ins_pipe( pipe_slow );
21113 %}
21114 
21115 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
21116   predicate(UseAVX > 0);
21117   match(Set dst (DivVF src1 src2));
21118   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
21119   ins_encode %{
21120     int vlen_enc = vector_length_encoding(this);
21121     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21122   %}
21123   ins_pipe( pipe_slow );
21124 %}
21125 
21126 instruct vdivF_mem(vec dst, vec src, memory mem) %{
21127   predicate((UseAVX > 0) &&
21128             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21129   match(Set dst (DivVF src (LoadVector mem)));
21130   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
21131   ins_encode %{
21132     int vlen_enc = vector_length_encoding(this);
21133     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21134   %}
21135   ins_pipe( pipe_slow );
21136 %}
21137 
21138 // Doubles vector div
21139 instruct vdivD(vec dst, vec src) %{
21140   predicate(UseAVX == 0);
21141   match(Set dst (DivVD dst src));
21142   format %{ "divpd   $dst,$src\t! div packedD" %}
21143   ins_encode %{
21144     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
21145   %}
21146   ins_pipe( pipe_slow );
21147 %}
21148 
21149 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
21150   predicate(UseAVX > 0);
21151   match(Set dst (DivVD src1 src2));
21152   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
21153   ins_encode %{
21154     int vlen_enc = vector_length_encoding(this);
21155     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21156   %}
21157   ins_pipe( pipe_slow );
21158 %}
21159 
21160 instruct vdivD_mem(vec dst, vec src, memory mem) %{
21161   predicate((UseAVX > 0) &&
21162             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21163   match(Set dst (DivVD src (LoadVector mem)));
21164   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
21165   ins_encode %{
21166     int vlen_enc = vector_length_encoding(this);
21167     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21168   %}
21169   ins_pipe( pipe_slow );
21170 %}
21171 
21172 // ------------------------------ MinMax ---------------------------------------
21173 
21174 // Byte, Short, Int vector Min/Max
21175 instruct minmax_reg_sse(vec dst, vec src) %{
21176   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21177             UseAVX == 0);
21178   match(Set dst (MinV dst src));
21179   match(Set dst (MaxV dst src));
21180   format %{ "vector_minmax  $dst,$src\t!  " %}
21181   ins_encode %{
21182     assert(UseSSE >= 4, "required");
21183 
21184     int opcode = this->ideal_Opcode();
21185     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21186     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
21187   %}
21188   ins_pipe( pipe_slow );
21189 %}
21190 
21191 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
21192   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21193             UseAVX > 0);
21194   match(Set dst (MinV src1 src2));
21195   match(Set dst (MaxV src1 src2));
21196   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
21197   ins_encode %{
21198     int opcode = this->ideal_Opcode();
21199     int vlen_enc = vector_length_encoding(this);
21200     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21201 
21202     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21203   %}
21204   ins_pipe( pipe_slow );
21205 %}
21206 
21207 // Long vector Min/Max
21208 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
21209   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
21210             UseAVX == 0);
21211   match(Set dst (MinV dst src));
21212   match(Set dst (MaxV src dst));
21213   effect(TEMP dst, TEMP tmp);
21214   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
21215   ins_encode %{
21216     assert(UseSSE >= 4, "required");
21217 
21218     int opcode = this->ideal_Opcode();
21219     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21220     assert(elem_bt == T_LONG, "sanity");
21221 
21222     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
21223   %}
21224   ins_pipe( pipe_slow );
21225 %}
21226 
21227 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
21228   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
21229             UseAVX > 0 && !VM_Version::supports_avx512vl());
21230   match(Set dst (MinV src1 src2));
21231   match(Set dst (MaxV src1 src2));
21232   effect(TEMP dst);
21233   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
21234   ins_encode %{
21235     int vlen_enc = vector_length_encoding(this);
21236     int opcode = this->ideal_Opcode();
21237     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21238     assert(elem_bt == T_LONG, "sanity");
21239 
21240     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21241   %}
21242   ins_pipe( pipe_slow );
21243 %}
21244 
21245 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
21246   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
21247             Matcher::vector_element_basic_type(n) == T_LONG);
21248   match(Set dst (MinV src1 src2));
21249   match(Set dst (MaxV src1 src2));
21250   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
21251   ins_encode %{
21252     assert(UseAVX > 2, "required");
21253 
21254     int vlen_enc = vector_length_encoding(this);
21255     int opcode = this->ideal_Opcode();
21256     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21257     assert(elem_bt == T_LONG, "sanity");
21258 
21259     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21260   %}
21261   ins_pipe( pipe_slow );
21262 %}
21263 
21264 // Float/Double vector Min/Max
21265 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
21266   predicate(VM_Version::supports_avx10_2() &&
21267             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21268   match(Set dst (MinV a b));
21269   match(Set dst (MaxV a b));
21270   format %{ "vector_minmaxFP  $dst, $a, $b" %}
21271   ins_encode %{
21272     int vlen_enc = vector_length_encoding(this);
21273     int opcode = this->ideal_Opcode();
21274     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21275     __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21276   %}
21277   ins_pipe( pipe_slow );
21278 %}
21279 
21280 // Float/Double vector Min/Max
21281 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
21282   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
21283             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
21284             UseAVX > 0);
21285   match(Set dst (MinV a b));
21286   match(Set dst (MaxV a b));
21287   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
21288   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
21289   ins_encode %{
21290     assert(UseAVX > 0, "required");
21291 
21292     int opcode = this->ideal_Opcode();
21293     int vlen_enc = vector_length_encoding(this);
21294     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21295 
21296     __ vminmax_fp(opcode, elem_bt,
21297                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21298                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21299   %}
21300   ins_pipe( pipe_slow );
21301 %}
21302 
21303 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
21304   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
21305             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21306   match(Set dst (MinV a b));
21307   match(Set dst (MaxV a b));
21308   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
21309   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21310   ins_encode %{
21311     assert(UseAVX > 2, "required");
21312 
21313     int opcode = this->ideal_Opcode();
21314     int vlen_enc = vector_length_encoding(this);
21315     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21316 
21317     __ evminmax_fp(opcode, elem_bt,
21318                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21319                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21320   %}
21321   ins_pipe( pipe_slow );
21322 %}
21323 
21324 // ------------------------------ Unsigned vector Min/Max ----------------------
21325 
21326 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21327   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21328   match(Set dst (UMinV a b));
21329   match(Set dst (UMaxV a b));
21330   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21331   ins_encode %{
21332     int opcode = this->ideal_Opcode();
21333     int vlen_enc = vector_length_encoding(this);
21334     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21335     assert(is_integral_type(elem_bt), "");
21336     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21337   %}
21338   ins_pipe( pipe_slow );
21339 %}
21340 
21341 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21342   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21343   match(Set dst (UMinV a (LoadVector b)));
21344   match(Set dst (UMaxV a (LoadVector b)));
21345   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21346   ins_encode %{
21347     int opcode = this->ideal_Opcode();
21348     int vlen_enc = vector_length_encoding(this);
21349     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21350     assert(is_integral_type(elem_bt), "");
21351     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21352   %}
21353   ins_pipe( pipe_slow );
21354 %}
21355 
21356 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21357   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21358   match(Set dst (UMinV a b));
21359   match(Set dst (UMaxV a b));
21360   effect(TEMP xtmp1, TEMP xtmp2);
21361   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21362   ins_encode %{
21363     int opcode = this->ideal_Opcode();
21364     int vlen_enc = vector_length_encoding(this);
21365     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21366   %}
21367   ins_pipe( pipe_slow );
21368 %}
21369 
21370 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21371   match(Set dst (UMinV (Binary dst src2) mask));
21372   match(Set dst (UMaxV (Binary dst src2) mask));
21373   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21374   ins_encode %{
21375     int vlen_enc = vector_length_encoding(this);
21376     BasicType bt = Matcher::vector_element_basic_type(this);
21377     int opc = this->ideal_Opcode();
21378     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21379                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21380   %}
21381   ins_pipe( pipe_slow );
21382 %}
21383 
21384 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21385   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21386   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21387   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21388   ins_encode %{
21389     int vlen_enc = vector_length_encoding(this);
21390     BasicType bt = Matcher::vector_element_basic_type(this);
21391     int opc = this->ideal_Opcode();
21392     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21393                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21394   %}
21395   ins_pipe( pipe_slow );
21396 %}
21397 
21398 // --------------------------------- Signum/CopySign ---------------------------
21399 
21400 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21401   match(Set dst (SignumF dst (Binary zero one)));
21402   effect(KILL cr);
21403   format %{ "signumF $dst, $dst" %}
21404   ins_encode %{
21405     int opcode = this->ideal_Opcode();
21406     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21407   %}
21408   ins_pipe( pipe_slow );
21409 %}
21410 
21411 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21412   match(Set dst (SignumD dst (Binary zero one)));
21413   effect(KILL cr);
21414   format %{ "signumD $dst, $dst" %}
21415   ins_encode %{
21416     int opcode = this->ideal_Opcode();
21417     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21418   %}
21419   ins_pipe( pipe_slow );
21420 %}
21421 
21422 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21423   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21424   match(Set dst (SignumVF src (Binary zero one)));
21425   match(Set dst (SignumVD src (Binary zero one)));
21426   effect(TEMP dst, TEMP xtmp1);
21427   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21428   ins_encode %{
21429     int opcode = this->ideal_Opcode();
21430     int vec_enc = vector_length_encoding(this);
21431     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21432                          $xtmp1$$XMMRegister, vec_enc);
21433   %}
21434   ins_pipe( pipe_slow );
21435 %}
21436 
21437 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21438   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21439   match(Set dst (SignumVF src (Binary zero one)));
21440   match(Set dst (SignumVD src (Binary zero one)));
21441   effect(TEMP dst, TEMP ktmp1);
21442   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21443   ins_encode %{
21444     int opcode = this->ideal_Opcode();
21445     int vec_enc = vector_length_encoding(this);
21446     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21447                           $ktmp1$$KRegister, vec_enc);
21448   %}
21449   ins_pipe( pipe_slow );
21450 %}
21451 
21452 // ---------------------------------------
21453 // For copySign use 0xE4 as writemask for vpternlog
21454 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21455 // C (xmm2) is set to 0x7FFFFFFF
21456 // Wherever xmm2 is 0, we want to pick from B (sign)
21457 // Wherever xmm2 is 1, we want to pick from A (src)
21458 //
21459 // A B C Result
21460 // 0 0 0 0
21461 // 0 0 1 0
21462 // 0 1 0 1
21463 // 0 1 1 0
21464 // 1 0 0 0
21465 // 1 0 1 1
21466 // 1 1 0 1
21467 // 1 1 1 1
21468 //
21469 // Result going from high bit to low bit is 0x11100100 = 0xe4
21470 // ---------------------------------------
21471 
21472 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21473   match(Set dst (CopySignF dst src));
21474   effect(TEMP tmp1, TEMP tmp2);
21475   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21476   ins_encode %{
21477     __ movl($tmp2$$Register, 0x7FFFFFFF);
21478     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21479     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21480   %}
21481   ins_pipe( pipe_slow );
21482 %}
21483 
21484 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21485   match(Set dst (CopySignD dst (Binary src zero)));
21486   ins_cost(100);
21487   effect(TEMP tmp1, TEMP tmp2);
21488   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21489   ins_encode %{
21490     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21491     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21492     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21493   %}
21494   ins_pipe( pipe_slow );
21495 %}
21496 
21497 //----------------------------- CompressBits/ExpandBits ------------------------
21498 
21499 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21500   predicate(n->bottom_type()->isa_int());
21501   match(Set dst (CompressBits src mask));
21502   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21503   ins_encode %{
21504     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21505   %}
21506   ins_pipe( pipe_slow );
21507 %}
21508 
21509 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21510   predicate(n->bottom_type()->isa_int());
21511   match(Set dst (ExpandBits src mask));
21512   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21513   ins_encode %{
21514     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21515   %}
21516   ins_pipe( pipe_slow );
21517 %}
21518 
21519 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21520   predicate(n->bottom_type()->isa_int());
21521   match(Set dst (CompressBits src (LoadI mask)));
21522   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21523   ins_encode %{
21524     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21525   %}
21526   ins_pipe( pipe_slow );
21527 %}
21528 
21529 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21530   predicate(n->bottom_type()->isa_int());
21531   match(Set dst (ExpandBits src (LoadI mask)));
21532   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21533   ins_encode %{
21534     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21535   %}
21536   ins_pipe( pipe_slow );
21537 %}
21538 
21539 // --------------------------------- Sqrt --------------------------------------
21540 
21541 instruct vsqrtF_reg(vec dst, vec src) %{
21542   match(Set dst (SqrtVF src));
21543   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21544   ins_encode %{
21545     assert(UseAVX > 0, "required");
21546     int vlen_enc = vector_length_encoding(this);
21547     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21548   %}
21549   ins_pipe( pipe_slow );
21550 %}
21551 
21552 instruct vsqrtF_mem(vec dst, memory mem) %{
21553   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21554   match(Set dst (SqrtVF (LoadVector mem)));
21555   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21556   ins_encode %{
21557     assert(UseAVX > 0, "required");
21558     int vlen_enc = vector_length_encoding(this);
21559     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21560   %}
21561   ins_pipe( pipe_slow );
21562 %}
21563 
21564 // Floating point vector sqrt
21565 instruct vsqrtD_reg(vec dst, vec src) %{
21566   match(Set dst (SqrtVD src));
21567   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21568   ins_encode %{
21569     assert(UseAVX > 0, "required");
21570     int vlen_enc = vector_length_encoding(this);
21571     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21572   %}
21573   ins_pipe( pipe_slow );
21574 %}
21575 
21576 instruct vsqrtD_mem(vec dst, memory mem) %{
21577   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21578   match(Set dst (SqrtVD (LoadVector mem)));
21579   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21580   ins_encode %{
21581     assert(UseAVX > 0, "required");
21582     int vlen_enc = vector_length_encoding(this);
21583     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21584   %}
21585   ins_pipe( pipe_slow );
21586 %}
21587 
21588 // ------------------------------ Shift ---------------------------------------
21589 
21590 // Left and right shift count vectors are the same on x86
21591 // (only lowest bits of xmm reg are used for count).
21592 instruct vshiftcnt(vec dst, rRegI cnt) %{
21593   match(Set dst (LShiftCntV cnt));
21594   match(Set dst (RShiftCntV cnt));
21595   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21596   ins_encode %{
21597     __ movdl($dst$$XMMRegister, $cnt$$Register);
21598   %}
21599   ins_pipe( pipe_slow );
21600 %}
21601 
21602 // Byte vector shift
21603 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21604   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21605   match(Set dst ( LShiftVB src shift));
21606   match(Set dst ( RShiftVB src shift));
21607   match(Set dst (URShiftVB src shift));
21608   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21609   format %{"vector_byte_shift $dst,$src,$shift" %}
21610   ins_encode %{
21611     assert(UseSSE > 3, "required");
21612     int opcode = this->ideal_Opcode();
21613     bool sign = (opcode != Op_URShiftVB);
21614     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21615     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21616     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21617     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21618     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21619   %}
21620   ins_pipe( pipe_slow );
21621 %}
21622 
21623 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21624   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21625             UseAVX <= 1);
21626   match(Set dst ( LShiftVB src shift));
21627   match(Set dst ( RShiftVB src shift));
21628   match(Set dst (URShiftVB src shift));
21629   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21630   format %{"vector_byte_shift $dst,$src,$shift" %}
21631   ins_encode %{
21632     assert(UseSSE > 3, "required");
21633     int opcode = this->ideal_Opcode();
21634     bool sign = (opcode != Op_URShiftVB);
21635     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21636     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21637     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21638     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21639     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21640     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21641     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21642     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21643     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21644   %}
21645   ins_pipe( pipe_slow );
21646 %}
21647 
21648 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21649   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21650             UseAVX > 1);
21651   match(Set dst ( LShiftVB src shift));
21652   match(Set dst ( RShiftVB src shift));
21653   match(Set dst (URShiftVB src shift));
21654   effect(TEMP dst, TEMP tmp);
21655   format %{"vector_byte_shift $dst,$src,$shift" %}
21656   ins_encode %{
21657     int opcode = this->ideal_Opcode();
21658     bool sign = (opcode != Op_URShiftVB);
21659     int vlen_enc = Assembler::AVX_256bit;
21660     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21661     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21662     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21663     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21664     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21665   %}
21666   ins_pipe( pipe_slow );
21667 %}
21668 
21669 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21670   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21671   match(Set dst ( LShiftVB src shift));
21672   match(Set dst ( RShiftVB src shift));
21673   match(Set dst (URShiftVB src shift));
21674   effect(TEMP dst, TEMP tmp);
21675   format %{"vector_byte_shift $dst,$src,$shift" %}
21676   ins_encode %{
21677     assert(UseAVX > 1, "required");
21678     int opcode = this->ideal_Opcode();
21679     bool sign = (opcode != Op_URShiftVB);
21680     int vlen_enc = Assembler::AVX_256bit;
21681     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21682     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21683     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21684     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21685     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21686     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21687     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21688     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21689     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21690   %}
21691   ins_pipe( pipe_slow );
21692 %}
21693 
21694 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21695   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21696   match(Set dst ( LShiftVB src shift));
21697   match(Set dst  (RShiftVB src shift));
21698   match(Set dst (URShiftVB src shift));
21699   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21700   format %{"vector_byte_shift $dst,$src,$shift" %}
21701   ins_encode %{
21702     assert(UseAVX > 2, "required");
21703     int opcode = this->ideal_Opcode();
21704     bool sign = (opcode != Op_URShiftVB);
21705     int vlen_enc = Assembler::AVX_512bit;
21706     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21707     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21708     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21709     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21710     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21711     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21712     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21713     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21714     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21715     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21716     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21717     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21718   %}
21719   ins_pipe( pipe_slow );
21720 %}
21721 
21722 // Shorts vector logical right shift produces incorrect Java result
21723 // for negative data because java code convert short value into int with
21724 // sign extension before a shift. But char vectors are fine since chars are
21725 // unsigned values.
21726 // Shorts/Chars vector left shift
21727 instruct vshiftS(vec dst, vec src, vec shift) %{
21728   predicate(!n->as_ShiftV()->is_var_shift());
21729   match(Set dst ( LShiftVS src shift));
21730   match(Set dst ( RShiftVS src shift));
21731   match(Set dst (URShiftVS src shift));
21732   effect(TEMP dst, USE src, USE shift);
21733   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21734   ins_encode %{
21735     int opcode = this->ideal_Opcode();
21736     if (UseAVX > 0) {
21737       int vlen_enc = vector_length_encoding(this);
21738       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21739     } else {
21740       int vlen = Matcher::vector_length(this);
21741       if (vlen == 2) {
21742         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21743         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21744       } else if (vlen == 4) {
21745         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21746         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21747       } else {
21748         assert (vlen == 8, "sanity");
21749         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21750         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21751       }
21752     }
21753   %}
21754   ins_pipe( pipe_slow );
21755 %}
21756 
21757 // Integers vector left shift
21758 instruct vshiftI(vec dst, vec src, vec shift) %{
21759   predicate(!n->as_ShiftV()->is_var_shift());
21760   match(Set dst ( LShiftVI src shift));
21761   match(Set dst ( RShiftVI src shift));
21762   match(Set dst (URShiftVI src shift));
21763   effect(TEMP dst, USE src, USE shift);
21764   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21765   ins_encode %{
21766     int opcode = this->ideal_Opcode();
21767     if (UseAVX > 0) {
21768       int vlen_enc = vector_length_encoding(this);
21769       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21770     } else {
21771       int vlen = Matcher::vector_length(this);
21772       if (vlen == 2) {
21773         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21774         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21775       } else {
21776         assert(vlen == 4, "sanity");
21777         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21778         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21779       }
21780     }
21781   %}
21782   ins_pipe( pipe_slow );
21783 %}
21784 
21785 // Integers vector left constant shift
21786 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21787   match(Set dst (LShiftVI src (LShiftCntV shift)));
21788   match(Set dst (RShiftVI src (RShiftCntV shift)));
21789   match(Set dst (URShiftVI src (RShiftCntV shift)));
21790   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21791   ins_encode %{
21792     int opcode = this->ideal_Opcode();
21793     if (UseAVX > 0) {
21794       int vector_len = vector_length_encoding(this);
21795       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21796     } else {
21797       int vlen = Matcher::vector_length(this);
21798       if (vlen == 2) {
21799         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21800         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21801       } else {
21802         assert(vlen == 4, "sanity");
21803         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21804         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21805       }
21806     }
21807   %}
21808   ins_pipe( pipe_slow );
21809 %}
21810 
21811 // Longs vector shift
21812 instruct vshiftL(vec dst, vec src, vec shift) %{
21813   predicate(!n->as_ShiftV()->is_var_shift());
21814   match(Set dst ( LShiftVL src shift));
21815   match(Set dst (URShiftVL src shift));
21816   effect(TEMP dst, USE src, USE shift);
21817   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21818   ins_encode %{
21819     int opcode = this->ideal_Opcode();
21820     if (UseAVX > 0) {
21821       int vlen_enc = vector_length_encoding(this);
21822       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21823     } else {
21824       assert(Matcher::vector_length(this) == 2, "");
21825       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21826       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21827     }
21828   %}
21829   ins_pipe( pipe_slow );
21830 %}
21831 
21832 // Longs vector constant shift
21833 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21834   match(Set dst (LShiftVL src (LShiftCntV shift)));
21835   match(Set dst (URShiftVL src (RShiftCntV shift)));
21836   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21837   ins_encode %{
21838     int opcode = this->ideal_Opcode();
21839     if (UseAVX > 0) {
21840       int vector_len = vector_length_encoding(this);
21841       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21842     } else {
21843       assert(Matcher::vector_length(this) == 2, "");
21844       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21845       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21846     }
21847   %}
21848   ins_pipe( pipe_slow );
21849 %}
21850 
21851 // -------------------ArithmeticRightShift -----------------------------------
21852 // Long vector arithmetic right shift
21853 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21854   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21855   match(Set dst (RShiftVL src shift));
21856   effect(TEMP dst, TEMP tmp);
21857   format %{ "vshiftq $dst,$src,$shift" %}
21858   ins_encode %{
21859     uint vlen = Matcher::vector_length(this);
21860     if (vlen == 2) {
21861       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21862       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21863       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21864       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21865       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21866       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21867     } else {
21868       assert(vlen == 4, "sanity");
21869       assert(UseAVX > 1, "required");
21870       int vlen_enc = Assembler::AVX_256bit;
21871       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21872       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21873       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21874       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21875       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21876     }
21877   %}
21878   ins_pipe( pipe_slow );
21879 %}
21880 
21881 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21882   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21883   match(Set dst (RShiftVL src shift));
21884   format %{ "vshiftq $dst,$src,$shift" %}
21885   ins_encode %{
21886     int vlen_enc = vector_length_encoding(this);
21887     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21888   %}
21889   ins_pipe( pipe_slow );
21890 %}
21891 
21892 // ------------------- Variable Shift -----------------------------
21893 // Byte variable shift
21894 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21895   predicate(Matcher::vector_length(n) <= 8 &&
21896             n->as_ShiftV()->is_var_shift() &&
21897             !VM_Version::supports_avx512bw());
21898   match(Set dst ( LShiftVB src shift));
21899   match(Set dst ( RShiftVB src shift));
21900   match(Set dst (URShiftVB src shift));
21901   effect(TEMP dst, TEMP vtmp);
21902   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21903   ins_encode %{
21904     assert(UseAVX >= 2, "required");
21905 
21906     int opcode = this->ideal_Opcode();
21907     int vlen_enc = Assembler::AVX_128bit;
21908     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21909     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21910   %}
21911   ins_pipe( pipe_slow );
21912 %}
21913 
21914 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21915   predicate(Matcher::vector_length(n) == 16 &&
21916             n->as_ShiftV()->is_var_shift() &&
21917             !VM_Version::supports_avx512bw());
21918   match(Set dst ( LShiftVB src shift));
21919   match(Set dst ( RShiftVB src shift));
21920   match(Set dst (URShiftVB src shift));
21921   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21922   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21923   ins_encode %{
21924     assert(UseAVX >= 2, "required");
21925 
21926     int opcode = this->ideal_Opcode();
21927     int vlen_enc = Assembler::AVX_128bit;
21928     // Shift lower half and get word result in dst
21929     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21930 
21931     // Shift upper half and get word result in vtmp1
21932     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21933     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21934     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21935 
21936     // Merge and down convert the two word results to byte in dst
21937     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21938   %}
21939   ins_pipe( pipe_slow );
21940 %}
21941 
21942 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21943   predicate(Matcher::vector_length(n) == 32 &&
21944             n->as_ShiftV()->is_var_shift() &&
21945             !VM_Version::supports_avx512bw());
21946   match(Set dst ( LShiftVB src shift));
21947   match(Set dst ( RShiftVB src shift));
21948   match(Set dst (URShiftVB src shift));
21949   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21950   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21951   ins_encode %{
21952     assert(UseAVX >= 2, "required");
21953 
21954     int opcode = this->ideal_Opcode();
21955     int vlen_enc = Assembler::AVX_128bit;
21956     // Process lower 128 bits and get result in dst
21957     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21958     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21959     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21960     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21961     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21962 
21963     // Process higher 128 bits and get result in vtmp3
21964     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21965     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21966     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21967     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21968     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21969     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21970     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21971 
21972     // Merge the two results in dst
21973     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21974   %}
21975   ins_pipe( pipe_slow );
21976 %}
21977 
21978 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21979   predicate(Matcher::vector_length(n) <= 32 &&
21980             n->as_ShiftV()->is_var_shift() &&
21981             VM_Version::supports_avx512bw());
21982   match(Set dst ( LShiftVB src shift));
21983   match(Set dst ( RShiftVB src shift));
21984   match(Set dst (URShiftVB src shift));
21985   effect(TEMP dst, TEMP vtmp);
21986   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21987   ins_encode %{
21988     assert(UseAVX > 2, "required");
21989 
21990     int opcode = this->ideal_Opcode();
21991     int vlen_enc = vector_length_encoding(this);
21992     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21993   %}
21994   ins_pipe( pipe_slow );
21995 %}
21996 
21997 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21998   predicate(Matcher::vector_length(n) == 64 &&
21999             n->as_ShiftV()->is_var_shift() &&
22000             VM_Version::supports_avx512bw());
22001   match(Set dst ( LShiftVB src shift));
22002   match(Set dst ( RShiftVB src shift));
22003   match(Set dst (URShiftVB src shift));
22004   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22005   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
22006   ins_encode %{
22007     assert(UseAVX > 2, "required");
22008 
22009     int opcode = this->ideal_Opcode();
22010     int vlen_enc = Assembler::AVX_256bit;
22011     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
22012     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
22013     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
22014     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
22015     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
22016   %}
22017   ins_pipe( pipe_slow );
22018 %}
22019 
22020 // Short variable shift
22021 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
22022   predicate(Matcher::vector_length(n) <= 8 &&
22023             n->as_ShiftV()->is_var_shift() &&
22024             !VM_Version::supports_avx512bw());
22025   match(Set dst ( LShiftVS src shift));
22026   match(Set dst ( RShiftVS src shift));
22027   match(Set dst (URShiftVS src shift));
22028   effect(TEMP dst, TEMP vtmp);
22029   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
22030   ins_encode %{
22031     assert(UseAVX >= 2, "required");
22032 
22033     int opcode = this->ideal_Opcode();
22034     bool sign = (opcode != Op_URShiftVS);
22035     int vlen_enc = Assembler::AVX_256bit;
22036     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
22037     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
22038     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22039     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22040     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
22041     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22042   %}
22043   ins_pipe( pipe_slow );
22044 %}
22045 
22046 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
22047   predicate(Matcher::vector_length(n) == 16 &&
22048             n->as_ShiftV()->is_var_shift() &&
22049             !VM_Version::supports_avx512bw());
22050   match(Set dst ( LShiftVS src shift));
22051   match(Set dst ( RShiftVS src shift));
22052   match(Set dst (URShiftVS src shift));
22053   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22054   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
22055   ins_encode %{
22056     assert(UseAVX >= 2, "required");
22057 
22058     int opcode = this->ideal_Opcode();
22059     bool sign = (opcode != Op_URShiftVS);
22060     int vlen_enc = Assembler::AVX_256bit;
22061     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
22062     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
22063     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22064     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22065     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22066 
22067     // Shift upper half, with result in dst using vtmp1 as TEMP
22068     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
22069     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
22070     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22071     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22072     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22073     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22074 
22075     // Merge lower and upper half result into dst
22076     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22077     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
22078   %}
22079   ins_pipe( pipe_slow );
22080 %}
22081 
22082 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
22083   predicate(n->as_ShiftV()->is_var_shift() &&
22084             VM_Version::supports_avx512bw());
22085   match(Set dst ( LShiftVS src shift));
22086   match(Set dst ( RShiftVS src shift));
22087   match(Set dst (URShiftVS src shift));
22088   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
22089   ins_encode %{
22090     assert(UseAVX > 2, "required");
22091 
22092     int opcode = this->ideal_Opcode();
22093     int vlen_enc = vector_length_encoding(this);
22094     if (!VM_Version::supports_avx512vl()) {
22095       vlen_enc = Assembler::AVX_512bit;
22096     }
22097     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22098   %}
22099   ins_pipe( pipe_slow );
22100 %}
22101 
22102 //Integer variable shift
22103 instruct vshiftI_var(vec dst, vec src, vec shift) %{
22104   predicate(n->as_ShiftV()->is_var_shift());
22105   match(Set dst ( LShiftVI src shift));
22106   match(Set dst ( RShiftVI src shift));
22107   match(Set dst (URShiftVI src shift));
22108   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
22109   ins_encode %{
22110     assert(UseAVX >= 2, "required");
22111 
22112     int opcode = this->ideal_Opcode();
22113     int vlen_enc = vector_length_encoding(this);
22114     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22115   %}
22116   ins_pipe( pipe_slow );
22117 %}
22118 
22119 //Long variable shift
22120 instruct vshiftL_var(vec dst, vec src, vec shift) %{
22121   predicate(n->as_ShiftV()->is_var_shift());
22122   match(Set dst ( LShiftVL src shift));
22123   match(Set dst (URShiftVL src shift));
22124   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
22125   ins_encode %{
22126     assert(UseAVX >= 2, "required");
22127 
22128     int opcode = this->ideal_Opcode();
22129     int vlen_enc = vector_length_encoding(this);
22130     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22131   %}
22132   ins_pipe( pipe_slow );
22133 %}
22134 
22135 //Long variable right shift arithmetic
22136 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
22137   predicate(Matcher::vector_length(n) <= 4 &&
22138             n->as_ShiftV()->is_var_shift() &&
22139             UseAVX == 2);
22140   match(Set dst (RShiftVL src shift));
22141   effect(TEMP dst, TEMP vtmp);
22142   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
22143   ins_encode %{
22144     int opcode = this->ideal_Opcode();
22145     int vlen_enc = vector_length_encoding(this);
22146     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
22147                  $vtmp$$XMMRegister);
22148   %}
22149   ins_pipe( pipe_slow );
22150 %}
22151 
22152 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
22153   predicate(n->as_ShiftV()->is_var_shift() &&
22154             UseAVX > 2);
22155   match(Set dst (RShiftVL src shift));
22156   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
22157   ins_encode %{
22158     int opcode = this->ideal_Opcode();
22159     int vlen_enc = vector_length_encoding(this);
22160     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22161   %}
22162   ins_pipe( pipe_slow );
22163 %}
22164 
22165 // --------------------------------- AND --------------------------------------
22166 
22167 instruct vand(vec dst, vec src) %{
22168   predicate(UseAVX == 0);
22169   match(Set dst (AndV dst src));
22170   format %{ "pand    $dst,$src\t! and vectors" %}
22171   ins_encode %{
22172     __ pand($dst$$XMMRegister, $src$$XMMRegister);
22173   %}
22174   ins_pipe( pipe_slow );
22175 %}
22176 
22177 instruct vand_reg(vec dst, vec src1, vec src2) %{
22178   predicate(UseAVX > 0);
22179   match(Set dst (AndV src1 src2));
22180   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
22181   ins_encode %{
22182     int vlen_enc = vector_length_encoding(this);
22183     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22184   %}
22185   ins_pipe( pipe_slow );
22186 %}
22187 
22188 instruct vand_mem(vec dst, vec src, memory mem) %{
22189   predicate((UseAVX > 0) &&
22190             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22191   match(Set dst (AndV src (LoadVector mem)));
22192   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
22193   ins_encode %{
22194     int vlen_enc = vector_length_encoding(this);
22195     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22196   %}
22197   ins_pipe( pipe_slow );
22198 %}
22199 
22200 // --------------------------------- OR ---------------------------------------
22201 
22202 instruct vor(vec dst, vec src) %{
22203   predicate(UseAVX == 0);
22204   match(Set dst (OrV dst src));
22205   format %{ "por     $dst,$src\t! or vectors" %}
22206   ins_encode %{
22207     __ por($dst$$XMMRegister, $src$$XMMRegister);
22208   %}
22209   ins_pipe( pipe_slow );
22210 %}
22211 
22212 instruct vor_reg(vec dst, vec src1, vec src2) %{
22213   predicate(UseAVX > 0);
22214   match(Set dst (OrV src1 src2));
22215   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
22216   ins_encode %{
22217     int vlen_enc = vector_length_encoding(this);
22218     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22219   %}
22220   ins_pipe( pipe_slow );
22221 %}
22222 
22223 instruct vor_mem(vec dst, vec src, memory mem) %{
22224   predicate((UseAVX > 0) &&
22225             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22226   match(Set dst (OrV src (LoadVector mem)));
22227   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
22228   ins_encode %{
22229     int vlen_enc = vector_length_encoding(this);
22230     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22231   %}
22232   ins_pipe( pipe_slow );
22233 %}
22234 
22235 // --------------------------------- XOR --------------------------------------
22236 
22237 instruct vxor(vec dst, vec src) %{
22238   predicate(UseAVX == 0);
22239   match(Set dst (XorV dst src));
22240   format %{ "pxor    $dst,$src\t! xor vectors" %}
22241   ins_encode %{
22242     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
22243   %}
22244   ins_pipe( pipe_slow );
22245 %}
22246 
22247 instruct vxor_reg(vec dst, vec src1, vec src2) %{
22248   predicate(UseAVX > 0);
22249   match(Set dst (XorV src1 src2));
22250   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
22251   ins_encode %{
22252     int vlen_enc = vector_length_encoding(this);
22253     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22254   %}
22255   ins_pipe( pipe_slow );
22256 %}
22257 
22258 instruct vxor_mem(vec dst, vec src, memory mem) %{
22259   predicate((UseAVX > 0) &&
22260             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22261   match(Set dst (XorV src (LoadVector mem)));
22262   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
22263   ins_encode %{
22264     int vlen_enc = vector_length_encoding(this);
22265     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22266   %}
22267   ins_pipe( pipe_slow );
22268 %}
22269 
22270 // --------------------------------- VectorCast --------------------------------------
22271 
22272 instruct vcastBtoX(vec dst, vec src) %{
22273   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
22274   match(Set dst (VectorCastB2X src));
22275   format %{ "vector_cast_b2x $dst,$src\t!" %}
22276   ins_encode %{
22277     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22278     int vlen_enc = vector_length_encoding(this);
22279     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22280   %}
22281   ins_pipe( pipe_slow );
22282 %}
22283 
22284 instruct vcastBtoD(legVec dst, legVec src) %{
22285   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
22286   match(Set dst (VectorCastB2X src));
22287   format %{ "vector_cast_b2x $dst,$src\t!" %}
22288   ins_encode %{
22289     int vlen_enc = vector_length_encoding(this);
22290     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22291   %}
22292   ins_pipe( pipe_slow );
22293 %}
22294 
22295 instruct castStoX(vec dst, vec src) %{
22296   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22297             Matcher::vector_length(n->in(1)) <= 8 && // src
22298             Matcher::vector_element_basic_type(n) == T_BYTE);
22299   match(Set dst (VectorCastS2X src));
22300   format %{ "vector_cast_s2x $dst,$src" %}
22301   ins_encode %{
22302     assert(UseAVX > 0, "required");
22303 
22304     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
22305     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
22306   %}
22307   ins_pipe( pipe_slow );
22308 %}
22309 
22310 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22311   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22312             Matcher::vector_length(n->in(1)) == 16 && // src
22313             Matcher::vector_element_basic_type(n) == T_BYTE);
22314   effect(TEMP dst, TEMP vtmp);
22315   match(Set dst (VectorCastS2X src));
22316   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22317   ins_encode %{
22318     assert(UseAVX > 0, "required");
22319 
22320     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22321     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22322     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22323     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22324   %}
22325   ins_pipe( pipe_slow );
22326 %}
22327 
22328 instruct vcastStoX_evex(vec dst, vec src) %{
22329   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22330             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22331   match(Set dst (VectorCastS2X src));
22332   format %{ "vector_cast_s2x $dst,$src\t!" %}
22333   ins_encode %{
22334     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22335     int src_vlen_enc = vector_length_encoding(this, $src);
22336     int vlen_enc = vector_length_encoding(this);
22337     switch (to_elem_bt) {
22338       case T_BYTE:
22339         if (!VM_Version::supports_avx512vl()) {
22340           vlen_enc = Assembler::AVX_512bit;
22341         }
22342         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22343         break;
22344       case T_INT:
22345         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22346         break;
22347       case T_FLOAT:
22348         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22349         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22350         break;
22351       case T_LONG:
22352         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22353         break;
22354       case T_DOUBLE: {
22355         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22356         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22357         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22358         break;
22359       }
22360       default:
22361         ShouldNotReachHere();
22362     }
22363   %}
22364   ins_pipe( pipe_slow );
22365 %}
22366 
22367 instruct castItoX(vec dst, vec src) %{
22368   predicate(UseAVX <= 2 &&
22369             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22370             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22371   match(Set dst (VectorCastI2X src));
22372   format %{ "vector_cast_i2x $dst,$src" %}
22373   ins_encode %{
22374     assert(UseAVX > 0, "required");
22375 
22376     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22377     int vlen_enc = vector_length_encoding(this, $src);
22378 
22379     if (to_elem_bt == T_BYTE) {
22380       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22381       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22382       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22383     } else {
22384       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22385       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22386       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22387     }
22388   %}
22389   ins_pipe( pipe_slow );
22390 %}
22391 
22392 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22393   predicate(UseAVX <= 2 &&
22394             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22395             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22396   match(Set dst (VectorCastI2X src));
22397   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22398   effect(TEMP dst, TEMP vtmp);
22399   ins_encode %{
22400     assert(UseAVX > 0, "required");
22401 
22402     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22403     int vlen_enc = vector_length_encoding(this, $src);
22404 
22405     if (to_elem_bt == T_BYTE) {
22406       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22407       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22408       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22409       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22410     } else {
22411       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22412       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22413       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22414       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22415     }
22416   %}
22417   ins_pipe( pipe_slow );
22418 %}
22419 
22420 instruct vcastItoX_evex(vec dst, vec src) %{
22421   predicate(UseAVX > 2 ||
22422             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22423   match(Set dst (VectorCastI2X src));
22424   format %{ "vector_cast_i2x $dst,$src\t!" %}
22425   ins_encode %{
22426     assert(UseAVX > 0, "required");
22427 
22428     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22429     int src_vlen_enc = vector_length_encoding(this, $src);
22430     int dst_vlen_enc = vector_length_encoding(this);
22431     switch (dst_elem_bt) {
22432       case T_BYTE:
22433         if (!VM_Version::supports_avx512vl()) {
22434           src_vlen_enc = Assembler::AVX_512bit;
22435         }
22436         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22437         break;
22438       case T_SHORT:
22439         if (!VM_Version::supports_avx512vl()) {
22440           src_vlen_enc = Assembler::AVX_512bit;
22441         }
22442         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22443         break;
22444       case T_FLOAT:
22445         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22446         break;
22447       case T_LONG:
22448         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22449         break;
22450       case T_DOUBLE:
22451         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22452         break;
22453       default:
22454         ShouldNotReachHere();
22455     }
22456   %}
22457   ins_pipe( pipe_slow );
22458 %}
22459 
22460 instruct vcastLtoBS(vec dst, vec src) %{
22461   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22462             UseAVX <= 2);
22463   match(Set dst (VectorCastL2X src));
22464   format %{ "vector_cast_l2x  $dst,$src" %}
22465   ins_encode %{
22466     assert(UseAVX > 0, "required");
22467 
22468     int vlen = Matcher::vector_length_in_bytes(this, $src);
22469     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
22470     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22471                                                       : ExternalAddress(vector_int_to_short_mask());
22472     if (vlen <= 16) {
22473       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22474       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22475       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22476     } else {
22477       assert(vlen <= 32, "required");
22478       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22479       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22480       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22481       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22482     }
22483     if (to_elem_bt == T_BYTE) {
22484       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22485     }
22486   %}
22487   ins_pipe( pipe_slow );
22488 %}
22489 
22490 instruct vcastLtoX_evex(vec dst, vec src) %{
22491   predicate(UseAVX > 2 ||
22492             (Matcher::vector_element_basic_type(n) == T_INT ||
22493              Matcher::vector_element_basic_type(n) == T_FLOAT ||
22494              Matcher::vector_element_basic_type(n) == T_DOUBLE));
22495   match(Set dst (VectorCastL2X src));
22496   format %{ "vector_cast_l2x  $dst,$src\t!" %}
22497   ins_encode %{
22498     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22499     int vlen = Matcher::vector_length_in_bytes(this, $src);
22500     int vlen_enc = vector_length_encoding(this, $src);
22501     switch (to_elem_bt) {
22502       case T_BYTE:
22503         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22504           vlen_enc = Assembler::AVX_512bit;
22505         }
22506         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22507         break;
22508       case T_SHORT:
22509         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22510           vlen_enc = Assembler::AVX_512bit;
22511         }
22512         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22513         break;
22514       case T_INT:
22515         if (vlen == 8) {
22516           if ($dst$$XMMRegister != $src$$XMMRegister) {
22517             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22518           }
22519         } else if (vlen == 16) {
22520           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22521         } else if (vlen == 32) {
22522           if (UseAVX > 2) {
22523             if (!VM_Version::supports_avx512vl()) {
22524               vlen_enc = Assembler::AVX_512bit;
22525             }
22526             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22527           } else {
22528             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22529             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22530           }
22531         } else { // vlen == 64
22532           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22533         }
22534         break;
22535       case T_FLOAT:
22536         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22537         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22538         break;
22539       case T_DOUBLE:
22540         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22541         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22542         break;
22543 
22544       default: assert(false, "%s", type2name(to_elem_bt));
22545     }
22546   %}
22547   ins_pipe( pipe_slow );
22548 %}
22549 
22550 instruct vcastFtoD_reg(vec dst, vec src) %{
22551   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22552   match(Set dst (VectorCastF2X src));
22553   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22554   ins_encode %{
22555     int vlen_enc = vector_length_encoding(this);
22556     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22557   %}
22558   ins_pipe( pipe_slow );
22559 %}
22560 
22561 
22562 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22563   predicate(!VM_Version::supports_avx10_2() &&
22564             !VM_Version::supports_avx512vl() &&
22565             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22566             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22567             is_integral_type(Matcher::vector_element_basic_type(n)));
22568   match(Set dst (VectorCastF2X src));
22569   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22570   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22571   ins_encode %{
22572     int vlen_enc = vector_length_encoding(this, $src);
22573     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22574     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22575     // 32 bit addresses for register indirect addressing mode since stub constants
22576     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22577     // However, targets are free to increase this limit, but having a large code cache size
22578     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22579     // cap we save a temporary register allocation which in limiting case can prevent
22580     // spilling in high register pressure blocks.
22581     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22582                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22583                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22584   %}
22585   ins_pipe( pipe_slow );
22586 %}
22587 
22588 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22589   predicate(!VM_Version::supports_avx10_2() &&
22590             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22591             is_integral_type(Matcher::vector_element_basic_type(n)));
22592   match(Set dst (VectorCastF2X src));
22593   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22594   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22595   ins_encode %{
22596     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22597     if (to_elem_bt == T_LONG) {
22598       int vlen_enc = vector_length_encoding(this);
22599       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22600                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22601                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22602     } else {
22603       int vlen_enc = vector_length_encoding(this, $src);
22604       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22605                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22606                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22607     }
22608   %}
22609   ins_pipe( pipe_slow );
22610 %}
22611 
22612 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22613   predicate(VM_Version::supports_avx10_2() &&
22614             is_integral_type(Matcher::vector_element_basic_type(n)));
22615   match(Set dst (VectorCastF2X src));
22616   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22617   ins_encode %{
22618     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22619     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22620     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22621   %}
22622   ins_pipe( pipe_slow );
22623 %}
22624 
22625 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22626   predicate(VM_Version::supports_avx10_2() &&
22627             is_integral_type(Matcher::vector_element_basic_type(n)));
22628   match(Set dst (VectorCastF2X (LoadVector src)));
22629   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22630   ins_encode %{
22631     int vlen = Matcher::vector_length(this);
22632     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22633     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22634     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22635   %}
22636   ins_pipe( pipe_slow );
22637 %}
22638 
22639 instruct vcastDtoF_reg(vec dst, vec src) %{
22640   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22641   match(Set dst (VectorCastD2X src));
22642   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22643   ins_encode %{
22644     int vlen_enc = vector_length_encoding(this, $src);
22645     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22646   %}
22647   ins_pipe( pipe_slow );
22648 %}
22649 
22650 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22651   predicate(!VM_Version::supports_avx10_2() &&
22652             !VM_Version::supports_avx512vl() &&
22653             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22654             is_integral_type(Matcher::vector_element_basic_type(n)));
22655   match(Set dst (VectorCastD2X src));
22656   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22657   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22658   ins_encode %{
22659     int vlen_enc = vector_length_encoding(this, $src);
22660     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22661     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22662                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22663                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22664   %}
22665   ins_pipe( pipe_slow );
22666 %}
22667 
22668 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22669   predicate(!VM_Version::supports_avx10_2() &&
22670             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22671             is_integral_type(Matcher::vector_element_basic_type(n)));
22672   match(Set dst (VectorCastD2X src));
22673   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22674   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22675   ins_encode %{
22676     int vlen_enc = vector_length_encoding(this, $src);
22677     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22678     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22679                               ExternalAddress(vector_float_signflip());
22680     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22681                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22682   %}
22683   ins_pipe( pipe_slow );
22684 %}
22685 
22686 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22687   predicate(VM_Version::supports_avx10_2() &&
22688             is_integral_type(Matcher::vector_element_basic_type(n)));
22689   match(Set dst (VectorCastD2X src));
22690   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22691   ins_encode %{
22692     int vlen_enc = vector_length_encoding(this, $src);
22693     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22694     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22695   %}
22696   ins_pipe( pipe_slow );
22697 %}
22698 
22699 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22700   predicate(VM_Version::supports_avx10_2() &&
22701             is_integral_type(Matcher::vector_element_basic_type(n)));
22702   match(Set dst (VectorCastD2X (LoadVector src)));
22703   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22704   ins_encode %{
22705     int vlen = Matcher::vector_length(this);
22706     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22707     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22708     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22709   %}
22710   ins_pipe( pipe_slow );
22711 %}
22712 
22713 instruct vucast(vec dst, vec src) %{
22714   match(Set dst (VectorUCastB2X src));
22715   match(Set dst (VectorUCastS2X src));
22716   match(Set dst (VectorUCastI2X src));
22717   format %{ "vector_ucast $dst,$src\t!" %}
22718   ins_encode %{
22719     assert(UseAVX > 0, "required");
22720 
22721     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22722     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22723     int vlen_enc = vector_length_encoding(this);
22724     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22725   %}
22726   ins_pipe( pipe_slow );
22727 %}
22728 
22729 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22730   predicate(!VM_Version::supports_avx512vl() &&
22731             Matcher::vector_length_in_bytes(n) < 64 &&
22732             Matcher::vector_element_basic_type(n) == T_INT);
22733   match(Set dst (RoundVF src));
22734   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22735   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22736   ins_encode %{
22737     int vlen_enc = vector_length_encoding(this);
22738     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22739     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22740                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22741                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22742   %}
22743   ins_pipe( pipe_slow );
22744 %}
22745 
22746 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22747   predicate((VM_Version::supports_avx512vl() ||
22748              Matcher::vector_length_in_bytes(n) == 64) &&
22749              Matcher::vector_element_basic_type(n) == T_INT);
22750   match(Set dst (RoundVF src));
22751   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22752   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22753   ins_encode %{
22754     int vlen_enc = vector_length_encoding(this);
22755     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22756     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22757                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22758                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22759   %}
22760   ins_pipe( pipe_slow );
22761 %}
22762 
22763 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22764   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22765   match(Set dst (RoundVD src));
22766   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22767   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22768   ins_encode %{
22769     int vlen_enc = vector_length_encoding(this);
22770     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22771     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22772                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22773                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22774   %}
22775   ins_pipe( pipe_slow );
22776 %}
22777 
22778 // --------------------------------- VectorMaskCmp --------------------------------------
22779 
22780 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22781   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22782             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22783             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22784             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22785   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22786   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22787   ins_encode %{
22788     int vlen_enc = vector_length_encoding(this, $src1);
22789     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22790     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22791       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22792     } else {
22793       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22794     }
22795   %}
22796   ins_pipe( pipe_slow );
22797 %}
22798 
22799 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22800   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22801             n->bottom_type()->isa_vectmask() == nullptr &&
22802             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22803   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22804   effect(TEMP ktmp);
22805   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22806   ins_encode %{
22807     int vlen_enc = Assembler::AVX_512bit;
22808     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22809     KRegister mask = k0; // The comparison itself is not being masked.
22810     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22811       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22812       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22813     } else {
22814       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22815       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22816     }
22817   %}
22818   ins_pipe( pipe_slow );
22819 %}
22820 
22821 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22822   predicate(n->bottom_type()->isa_vectmask() &&
22823             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22824   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22825   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22826   ins_encode %{
22827     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22828     int vlen_enc = vector_length_encoding(this, $src1);
22829     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22830     KRegister mask = k0; // The comparison itself is not being masked.
22831     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22832       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22833     } else {
22834       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22835     }
22836   %}
22837   ins_pipe( pipe_slow );
22838 %}
22839 
22840 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22841   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22842             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22843             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22844             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22845             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22846             (n->in(2)->get_int() == BoolTest::eq ||
22847              n->in(2)->get_int() == BoolTest::lt ||
22848              n->in(2)->get_int() == BoolTest::gt)); // cond
22849   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22850   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22851   ins_encode %{
22852     int vlen_enc = vector_length_encoding(this, $src1);
22853     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22854     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22855     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22856   %}
22857   ins_pipe( pipe_slow );
22858 %}
22859 
22860 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22861   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22862             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22863             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22864             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22865             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22866             (n->in(2)->get_int() == BoolTest::ne ||
22867              n->in(2)->get_int() == BoolTest::le ||
22868              n->in(2)->get_int() == BoolTest::ge)); // cond
22869   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22870   effect(TEMP dst, TEMP xtmp);
22871   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22872   ins_encode %{
22873     int vlen_enc = vector_length_encoding(this, $src1);
22874     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22875     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22876     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22877   %}
22878   ins_pipe( pipe_slow );
22879 %}
22880 
22881 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22882   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22883             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22884             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22885             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22886             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22887   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22888   effect(TEMP dst, TEMP xtmp);
22889   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22890   ins_encode %{
22891     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22892     int vlen_enc = vector_length_encoding(this, $src1);
22893     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22894     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22895 
22896     if (vlen_enc == Assembler::AVX_128bit) {
22897       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22898     } else {
22899       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22900     }
22901     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22902     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22903     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22904   %}
22905   ins_pipe( pipe_slow );
22906 %}
22907 
22908 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22909   predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22910              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22911              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22912   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22913   effect(TEMP ktmp);
22914   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22915   ins_encode %{
22916     assert(UseAVX > 2, "required");
22917 
22918     int vlen_enc = vector_length_encoding(this, $src1);
22919     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22920     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22921     KRegister mask = k0; // The comparison itself is not being masked.
22922     bool merge = false;
22923     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22924 
22925     switch (src1_elem_bt) {
22926       case T_INT: {
22927         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22928         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22929         break;
22930       }
22931       case T_LONG: {
22932         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22933         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22934         break;
22935       }
22936       default: assert(false, "%s", type2name(src1_elem_bt));
22937     }
22938   %}
22939   ins_pipe( pipe_slow );
22940 %}
22941 
22942 
22943 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22944   predicate(n->bottom_type()->isa_vectmask() &&
22945             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22946   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22947   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22948   ins_encode %{
22949     assert(UseAVX > 2, "required");
22950     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22951 
22952     int vlen_enc = vector_length_encoding(this, $src1);
22953     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22954     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22955     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22956 
22957     // Comparison i
22958     switch (src1_elem_bt) {
22959       case T_BYTE: {
22960         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22961         break;
22962       }
22963       case T_SHORT: {
22964         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22965         break;
22966       }
22967       case T_INT: {
22968         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22969         break;
22970       }
22971       case T_LONG: {
22972         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22973         break;
22974       }
22975       default: assert(false, "%s", type2name(src1_elem_bt));
22976     }
22977   %}
22978   ins_pipe( pipe_slow );
22979 %}
22980 
22981 // Extract
22982 
22983 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22984   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22985   match(Set dst (ExtractI src idx));
22986   match(Set dst (ExtractS src idx));
22987   match(Set dst (ExtractB src idx));
22988   format %{ "extractI $dst,$src,$idx\t!" %}
22989   ins_encode %{
22990     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22991 
22992     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22993     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22994   %}
22995   ins_pipe( pipe_slow );
22996 %}
22997 
22998 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22999   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
23000             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
23001   match(Set dst (ExtractI src idx));
23002   match(Set dst (ExtractS src idx));
23003   match(Set dst (ExtractB src idx));
23004   effect(TEMP vtmp);
23005   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
23006   ins_encode %{
23007     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23008 
23009     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
23010     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23011     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
23012   %}
23013   ins_pipe( pipe_slow );
23014 %}
23015 
23016 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
23017   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
23018   match(Set dst (ExtractL src idx));
23019   format %{ "extractL $dst,$src,$idx\t!" %}
23020   ins_encode %{
23021     assert(UseSSE >= 4, "required");
23022     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23023 
23024     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
23025   %}
23026   ins_pipe( pipe_slow );
23027 %}
23028 
23029 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
23030   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
23031             Matcher::vector_length(n->in(1)) == 8);  // src
23032   match(Set dst (ExtractL src idx));
23033   effect(TEMP vtmp);
23034   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
23035   ins_encode %{
23036     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23037 
23038     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23039     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
23040   %}
23041   ins_pipe( pipe_slow );
23042 %}
23043 
23044 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
23045   predicate(Matcher::vector_length(n->in(1)) <= 4);
23046   match(Set dst (ExtractF src idx));
23047   effect(TEMP dst, TEMP vtmp);
23048   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
23049   ins_encode %{
23050     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23051 
23052     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
23053   %}
23054   ins_pipe( pipe_slow );
23055 %}
23056 
23057 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
23058   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
23059             Matcher::vector_length(n->in(1)/*src*/) == 16);
23060   match(Set dst (ExtractF src idx));
23061   effect(TEMP vtmp);
23062   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
23063   ins_encode %{
23064     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23065 
23066     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23067     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
23068   %}
23069   ins_pipe( pipe_slow );
23070 %}
23071 
23072 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
23073   predicate(Matcher::vector_length(n->in(1)) == 2); // src
23074   match(Set dst (ExtractD src idx));
23075   format %{ "extractD $dst,$src,$idx\t!" %}
23076   ins_encode %{
23077     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23078 
23079     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23080   %}
23081   ins_pipe( pipe_slow );
23082 %}
23083 
23084 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
23085   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
23086             Matcher::vector_length(n->in(1)) == 8);  // src
23087   match(Set dst (ExtractD src idx));
23088   effect(TEMP vtmp);
23089   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
23090   ins_encode %{
23091     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23092 
23093     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23094     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
23095   %}
23096   ins_pipe( pipe_slow );
23097 %}
23098 
23099 // --------------------------------- Vector Blend --------------------------------------
23100 
23101 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
23102   predicate(UseAVX == 0);
23103   match(Set dst (VectorBlend (Binary dst src) mask));
23104   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
23105   effect(TEMP tmp);
23106   ins_encode %{
23107     assert(UseSSE >= 4, "required");
23108 
23109     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
23110       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
23111     }
23112     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
23113   %}
23114   ins_pipe( pipe_slow );
23115 %}
23116 
23117 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
23118   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23119             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23120             Matcher::vector_length_in_bytes(n) <= 32 &&
23121             is_integral_type(Matcher::vector_element_basic_type(n)));
23122   match(Set dst (VectorBlend (Binary src1 src2) mask));
23123   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
23124   ins_encode %{
23125     int vlen_enc = vector_length_encoding(this);
23126     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23127   %}
23128   ins_pipe( pipe_slow );
23129 %}
23130 
23131 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
23132   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23133             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23134             Matcher::vector_length_in_bytes(n) <= 32 &&
23135             !is_integral_type(Matcher::vector_element_basic_type(n)));
23136   match(Set dst (VectorBlend (Binary src1 src2) mask));
23137   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
23138   ins_encode %{
23139     int vlen_enc = vector_length_encoding(this);
23140     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23141   %}
23142   ins_pipe( pipe_slow );
23143 %}
23144 
23145 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
23146   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
23147             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23148             Matcher::vector_length_in_bytes(n) <= 32);
23149   match(Set dst (VectorBlend (Binary src1 src2) mask));
23150   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
23151   effect(TEMP vtmp, TEMP dst);
23152   ins_encode %{
23153     int vlen_enc = vector_length_encoding(this);
23154     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
23155     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23156     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
23157   %}
23158   ins_pipe( pipe_slow );
23159 %}
23160 
23161 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
23162   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
23163             n->in(2)->bottom_type()->isa_vectmask() == nullptr);
23164   match(Set dst (VectorBlend (Binary src1 src2) mask));
23165   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23166   effect(TEMP ktmp);
23167   ins_encode %{
23168      int vlen_enc = Assembler::AVX_512bit;
23169      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23170     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
23171     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23172   %}
23173   ins_pipe( pipe_slow );
23174 %}
23175 
23176 
23177 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
23178   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
23179             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
23180              VM_Version::supports_avx512bw()));
23181   match(Set dst (VectorBlend (Binary src1 src2) mask));
23182   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23183   ins_encode %{
23184     int vlen_enc = vector_length_encoding(this);
23185     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23186     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23187   %}
23188   ins_pipe( pipe_slow );
23189 %}
23190 
23191 // --------------------------------- ABS --------------------------------------
23192 // a = |a|
23193 instruct vabsB_reg(vec dst, vec src) %{
23194   match(Set dst (AbsVB  src));
23195   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
23196   ins_encode %{
23197     uint vlen = Matcher::vector_length(this);
23198     if (vlen <= 16) {
23199       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23200     } else {
23201       int vlen_enc = vector_length_encoding(this);
23202       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23203     }
23204   %}
23205   ins_pipe( pipe_slow );
23206 %}
23207 
23208 instruct vabsS_reg(vec dst, vec src) %{
23209   match(Set dst (AbsVS  src));
23210   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
23211   ins_encode %{
23212     uint vlen = Matcher::vector_length(this);
23213     if (vlen <= 8) {
23214       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23215     } else {
23216       int vlen_enc = vector_length_encoding(this);
23217       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23218     }
23219   %}
23220   ins_pipe( pipe_slow );
23221 %}
23222 
23223 instruct vabsI_reg(vec dst, vec src) %{
23224   match(Set dst (AbsVI  src));
23225   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
23226   ins_encode %{
23227     uint vlen = Matcher::vector_length(this);
23228     if (vlen <= 4) {
23229       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23230     } else {
23231       int vlen_enc = vector_length_encoding(this);
23232       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23233     }
23234   %}
23235   ins_pipe( pipe_slow );
23236 %}
23237 
23238 instruct vabsL_reg(vec dst, vec src) %{
23239   match(Set dst (AbsVL  src));
23240   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
23241   ins_encode %{
23242     assert(UseAVX > 2, "required");
23243     int vlen_enc = vector_length_encoding(this);
23244     if (!VM_Version::supports_avx512vl()) {
23245       vlen_enc = Assembler::AVX_512bit;
23246     }
23247     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23248   %}
23249   ins_pipe( pipe_slow );
23250 %}
23251 
23252 // --------------------------------- ABSNEG --------------------------------------
23253 
23254 instruct vabsnegF(vec dst, vec src) %{
23255   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
23256   match(Set dst (AbsVF src));
23257   match(Set dst (NegVF src));
23258   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
23259   ins_cost(150);
23260   ins_encode %{
23261     int opcode = this->ideal_Opcode();
23262     int vlen = Matcher::vector_length(this);
23263     if (vlen == 2) {
23264       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23265     } else {
23266       assert(vlen == 8 || vlen == 16, "required");
23267       int vlen_enc = vector_length_encoding(this);
23268       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23269     }
23270   %}
23271   ins_pipe( pipe_slow );
23272 %}
23273 
23274 instruct vabsneg4F(vec dst) %{
23275   predicate(Matcher::vector_length(n) == 4);
23276   match(Set dst (AbsVF dst));
23277   match(Set dst (NegVF dst));
23278   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
23279   ins_cost(150);
23280   ins_encode %{
23281     int opcode = this->ideal_Opcode();
23282     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
23283   %}
23284   ins_pipe( pipe_slow );
23285 %}
23286 
23287 instruct vabsnegD(vec dst, vec src) %{
23288   match(Set dst (AbsVD  src));
23289   match(Set dst (NegVD  src));
23290   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
23291   ins_encode %{
23292     int opcode = this->ideal_Opcode();
23293     uint vlen = Matcher::vector_length(this);
23294     if (vlen == 2) {
23295       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23296     } else {
23297       int vlen_enc = vector_length_encoding(this);
23298       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23299     }
23300   %}
23301   ins_pipe( pipe_slow );
23302 %}
23303 
23304 //------------------------------------- VectorTest --------------------------------------------
23305 
23306 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
23307   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
23308   match(Set cr (VectorTest src1 src2));
23309   effect(TEMP vtmp);
23310   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
23311   ins_encode %{
23312     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23313     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23314     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23315   %}
23316   ins_pipe( pipe_slow );
23317 %}
23318 
23319 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23320   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23321   match(Set cr (VectorTest src1 src2));
23322   format %{ "vptest_ge16  $src1, $src2\n\t" %}
23323   ins_encode %{
23324     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23325     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23326     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23327   %}
23328   ins_pipe( pipe_slow );
23329 %}
23330 
23331 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23332   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23333              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23334             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23335   match(Set cr (VectorTest src1 src2));
23336   effect(TEMP tmp);
23337   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23338   ins_encode %{
23339     uint masklen = Matcher::vector_length(this, $src1);
23340     __ kmovwl($tmp$$Register, $src1$$KRegister);
23341     __ andl($tmp$$Register, (1 << masklen) - 1);
23342     __ cmpl($tmp$$Register, (1 << masklen) - 1);
23343   %}
23344   ins_pipe( pipe_slow );
23345 %}
23346 
23347 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23348   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23349              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23350             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23351   match(Set cr (VectorTest src1 src2));
23352   effect(TEMP tmp);
23353   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23354   ins_encode %{
23355     uint masklen = Matcher::vector_length(this, $src1);
23356     __ kmovwl($tmp$$Register, $src1$$KRegister);
23357     __ andl($tmp$$Register, (1 << masklen) - 1);
23358   %}
23359   ins_pipe( pipe_slow );
23360 %}
23361 
23362 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23363   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23364             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23365   match(Set cr (VectorTest src1 src2));
23366   format %{ "ktest_ge8  $src1, $src2\n\t" %}
23367   ins_encode %{
23368     uint masklen = Matcher::vector_length(this, $src1);
23369     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23370   %}
23371   ins_pipe( pipe_slow );
23372 %}
23373 
23374 //------------------------------------- LoadMask --------------------------------------------
23375 
23376 instruct loadMask(legVec dst, legVec src) %{
23377   predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23378   match(Set dst (VectorLoadMask src));
23379   effect(TEMP dst);
23380   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23381   ins_encode %{
23382     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23383     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23384     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23385   %}
23386   ins_pipe( pipe_slow );
23387 %}
23388 
23389 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23390   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23391   match(Set dst (VectorLoadMask src));
23392   effect(TEMP xtmp);
23393   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23394   ins_encode %{
23395     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23396                         true, Assembler::AVX_512bit);
23397   %}
23398   ins_pipe( pipe_slow );
23399 %}
23400 
23401 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
23402   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23403   match(Set dst (VectorLoadMask src));
23404   effect(TEMP xtmp);
23405   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23406   ins_encode %{
23407     int vlen_enc = vector_length_encoding(in(1));
23408     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23409                         false, vlen_enc);
23410   %}
23411   ins_pipe( pipe_slow );
23412 %}
23413 
23414 //------------------------------------- StoreMask --------------------------------------------
23415 
23416 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23417   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23418   match(Set dst (VectorStoreMask src size));
23419   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23420   ins_encode %{
23421     int vlen = Matcher::vector_length(this);
23422     if (vlen <= 16 && UseAVX <= 2) {
23423       assert(UseSSE >= 3, "required");
23424       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23425     } else {
23426       assert(UseAVX > 0, "required");
23427       int src_vlen_enc = vector_length_encoding(this, $src);
23428       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23429     }
23430   %}
23431   ins_pipe( pipe_slow );
23432 %}
23433 
23434 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23435   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23436   match(Set dst (VectorStoreMask src size));
23437   effect(TEMP_DEF dst, TEMP xtmp);
23438   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23439   ins_encode %{
23440     int vlen_enc = Assembler::AVX_128bit;
23441     int vlen = Matcher::vector_length(this);
23442     if (vlen <= 8) {
23443       assert(UseSSE >= 3, "required");
23444       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23445       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23446       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23447     } else {
23448       assert(UseAVX > 0, "required");
23449       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23450       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23451       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23452     }
23453   %}
23454   ins_pipe( pipe_slow );
23455 %}
23456 
23457 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23458   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23459   match(Set dst (VectorStoreMask src size));
23460   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23461   effect(TEMP_DEF dst, TEMP xtmp);
23462   ins_encode %{
23463     int vlen_enc = Assembler::AVX_128bit;
23464     int vlen = Matcher::vector_length(this);
23465     if (vlen <= 4) {
23466       assert(UseSSE >= 3, "required");
23467       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23468       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23469       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23470       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23471     } else {
23472       assert(UseAVX > 0, "required");
23473       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23474       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23475       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23476       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23477       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23478     }
23479   %}
23480   ins_pipe( pipe_slow );
23481 %}
23482 
23483 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23484   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23485   match(Set dst (VectorStoreMask src size));
23486   effect(TEMP_DEF dst, TEMP xtmp);
23487   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23488   ins_encode %{
23489     assert(UseSSE >= 3, "required");
23490     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23491     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23492     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23493     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23494     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23495   %}
23496   ins_pipe( pipe_slow );
23497 %}
23498 
23499 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23500   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23501   match(Set dst (VectorStoreMask src size));
23502   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23503   effect(TEMP_DEF dst, TEMP vtmp);
23504   ins_encode %{
23505     int vlen_enc = Assembler::AVX_128bit;
23506     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23507     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23508     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23509     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23510     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23511     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23512     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23513   %}
23514   ins_pipe( pipe_slow );
23515 %}
23516 
23517 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23518   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23519   match(Set dst (VectorStoreMask src size));
23520   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23521   ins_encode %{
23522     int src_vlen_enc = vector_length_encoding(this, $src);
23523     int dst_vlen_enc = vector_length_encoding(this);
23524     if (!VM_Version::supports_avx512vl()) {
23525       src_vlen_enc = Assembler::AVX_512bit;
23526     }
23527     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23528     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23529   %}
23530   ins_pipe( pipe_slow );
23531 %}
23532 
23533 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23534   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23535   match(Set dst (VectorStoreMask src size));
23536   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23537   ins_encode %{
23538     int src_vlen_enc = vector_length_encoding(this, $src);
23539     int dst_vlen_enc = vector_length_encoding(this);
23540     if (!VM_Version::supports_avx512vl()) {
23541       src_vlen_enc = Assembler::AVX_512bit;
23542     }
23543     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23544     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23545   %}
23546   ins_pipe( pipe_slow );
23547 %}
23548 
23549 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23550   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23551   match(Set dst (VectorStoreMask mask size));
23552   effect(TEMP_DEF dst);
23553   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23554   ins_encode %{
23555     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23556     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23557                  false, Assembler::AVX_512bit, noreg);
23558     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23559   %}
23560   ins_pipe( pipe_slow );
23561 %}
23562 
23563 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23564   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23565   match(Set dst (VectorStoreMask mask size));
23566   effect(TEMP_DEF dst);
23567   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23568   ins_encode %{
23569     int dst_vlen_enc = vector_length_encoding(this);
23570     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23571     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23572   %}
23573   ins_pipe( pipe_slow );
23574 %}
23575 
23576 instruct vmaskcast_evex(kReg dst) %{
23577   match(Set dst (VectorMaskCast dst));
23578   ins_cost(0);
23579   format %{ "vector_mask_cast $dst" %}
23580   ins_encode %{
23581     // empty
23582   %}
23583   ins_pipe(empty);
23584 %}
23585 
23586 instruct vmaskcast(vec dst) %{
23587   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23588   match(Set dst (VectorMaskCast dst));
23589   ins_cost(0);
23590   format %{ "vector_mask_cast $dst" %}
23591   ins_encode %{
23592     // empty
23593   %}
23594   ins_pipe(empty);
23595 %}
23596 
23597 instruct vmaskcast_avx(vec dst, vec src) %{
23598   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23599   match(Set dst (VectorMaskCast src));
23600   format %{ "vector_mask_cast $dst, $src" %}
23601   ins_encode %{
23602     int vlen = Matcher::vector_length(this);
23603     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23604     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23605     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23606   %}
23607   ins_pipe(pipe_slow);
23608 %}
23609 
23610 //-------------------------------- Load Iota Indices ----------------------------------
23611 
23612 instruct loadIotaIndices(vec dst, immI_0 src) %{
23613   match(Set dst (VectorLoadConst src));
23614   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23615   ins_encode %{
23616      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23617      BasicType bt = Matcher::vector_element_basic_type(this);
23618      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23619   %}
23620   ins_pipe( pipe_slow );
23621 %}
23622 
23623 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23624   match(Set dst (PopulateIndex src1 src2));
23625   effect(TEMP dst, TEMP vtmp);
23626   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23627   ins_encode %{
23628      assert($src2$$constant == 1, "required");
23629      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23630      int vlen_enc = vector_length_encoding(this);
23631      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23632      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23633      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23634      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23635   %}
23636   ins_pipe( pipe_slow );
23637 %}
23638 
23639 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23640   match(Set dst (PopulateIndex src1 src2));
23641   effect(TEMP dst, TEMP vtmp);
23642   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23643   ins_encode %{
23644      assert($src2$$constant == 1, "required");
23645      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23646      int vlen_enc = vector_length_encoding(this);
23647      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23648      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23649      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23650      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23651   %}
23652   ins_pipe( pipe_slow );
23653 %}
23654 
23655 //-------------------------------- Rearrange ----------------------------------
23656 
23657 // LoadShuffle/Rearrange for Byte
23658 instruct rearrangeB(vec dst, vec shuffle) %{
23659   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23660             Matcher::vector_length(n) < 32);
23661   match(Set dst (VectorRearrange dst shuffle));
23662   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23663   ins_encode %{
23664     assert(UseSSE >= 4, "required");
23665     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23666   %}
23667   ins_pipe( pipe_slow );
23668 %}
23669 
23670 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23671   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23672             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23673   match(Set dst (VectorRearrange src shuffle));
23674   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23675   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23676   ins_encode %{
23677     assert(UseAVX >= 2, "required");
23678     // Swap src into vtmp1
23679     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23680     // Shuffle swapped src to get entries from other 128 bit lane
23681     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23682     // Shuffle original src to get entries from self 128 bit lane
23683     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23684     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23685     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23686     // Perform the blend
23687     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23688   %}
23689   ins_pipe( pipe_slow );
23690 %}
23691 
23692 
23693 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23694   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23695             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23696   match(Set dst (VectorRearrange src shuffle));
23697   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23698   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23699   ins_encode %{
23700     int vlen_enc = vector_length_encoding(this);
23701     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23702                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23703                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23704   %}
23705   ins_pipe( pipe_slow );
23706 %}
23707 
23708 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23709   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23710             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23711   match(Set dst (VectorRearrange src shuffle));
23712   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23713   ins_encode %{
23714     int vlen_enc = vector_length_encoding(this);
23715     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23716   %}
23717   ins_pipe( pipe_slow );
23718 %}
23719 
23720 // LoadShuffle/Rearrange for Short
23721 
23722 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23723   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23724             !VM_Version::supports_avx512bw());
23725   match(Set dst (VectorLoadShuffle src));
23726   effect(TEMP dst, TEMP vtmp);
23727   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23728   ins_encode %{
23729     // Create a byte shuffle mask from short shuffle mask
23730     // only byte shuffle instruction available on these platforms
23731     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23732     if (UseAVX == 0) {
23733       assert(vlen_in_bytes <= 16, "required");
23734       // Multiply each shuffle by two to get byte index
23735       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23736       __ psllw($vtmp$$XMMRegister, 1);
23737 
23738       // Duplicate to create 2 copies of byte index
23739       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23740       __ psllw($dst$$XMMRegister, 8);
23741       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23742 
23743       // Add one to get alternate byte index
23744       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23745       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23746     } else {
23747       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23748       int vlen_enc = vector_length_encoding(this);
23749       // Multiply each shuffle by two to get byte index
23750       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23751 
23752       // Duplicate to create 2 copies of byte index
23753       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23754       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23755 
23756       // Add one to get alternate byte index
23757       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23758     }
23759   %}
23760   ins_pipe( pipe_slow );
23761 %}
23762 
23763 instruct rearrangeS(vec dst, vec shuffle) %{
23764   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23765             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23766   match(Set dst (VectorRearrange dst shuffle));
23767   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23768   ins_encode %{
23769     assert(UseSSE >= 4, "required");
23770     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23771   %}
23772   ins_pipe( pipe_slow );
23773 %}
23774 
23775 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23776   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23777             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23778   match(Set dst (VectorRearrange src shuffle));
23779   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23780   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23781   ins_encode %{
23782     assert(UseAVX >= 2, "required");
23783     // Swap src into vtmp1
23784     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23785     // Shuffle swapped src to get entries from other 128 bit lane
23786     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23787     // Shuffle original src to get entries from self 128 bit lane
23788     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23789     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23790     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23791     // Perform the blend
23792     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23793   %}
23794   ins_pipe( pipe_slow );
23795 %}
23796 
23797 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23798   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23799             VM_Version::supports_avx512bw());
23800   match(Set dst (VectorRearrange src shuffle));
23801   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23802   ins_encode %{
23803     int vlen_enc = vector_length_encoding(this);
23804     if (!VM_Version::supports_avx512vl()) {
23805       vlen_enc = Assembler::AVX_512bit;
23806     }
23807     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23808   %}
23809   ins_pipe( pipe_slow );
23810 %}
23811 
23812 // LoadShuffle/Rearrange for Integer and Float
23813 
23814 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23815   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23816             Matcher::vector_length(n) == 4 && UseAVX == 0);
23817   match(Set dst (VectorLoadShuffle src));
23818   effect(TEMP dst, TEMP vtmp);
23819   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23820   ins_encode %{
23821     assert(UseSSE >= 4, "required");
23822 
23823     // Create a byte shuffle mask from int shuffle mask
23824     // only byte shuffle instruction available on these platforms
23825 
23826     // Duplicate and multiply each shuffle by 4
23827     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23828     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23829     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23830     __ psllw($vtmp$$XMMRegister, 2);
23831 
23832     // Duplicate again to create 4 copies of byte index
23833     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23834     __ psllw($dst$$XMMRegister, 8);
23835     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23836 
23837     // Add 3,2,1,0 to get alternate byte index
23838     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23839     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23840   %}
23841   ins_pipe( pipe_slow );
23842 %}
23843 
23844 instruct rearrangeI(vec dst, vec shuffle) %{
23845   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23846             UseAVX == 0);
23847   match(Set dst (VectorRearrange dst shuffle));
23848   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23849   ins_encode %{
23850     assert(UseSSE >= 4, "required");
23851     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23852   %}
23853   ins_pipe( pipe_slow );
23854 %}
23855 
23856 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23857   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23858             UseAVX > 0);
23859   match(Set dst (VectorRearrange src shuffle));
23860   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23861   ins_encode %{
23862     int vlen_enc = vector_length_encoding(this);
23863     BasicType bt = Matcher::vector_element_basic_type(this);
23864     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23865   %}
23866   ins_pipe( pipe_slow );
23867 %}
23868 
23869 // LoadShuffle/Rearrange for Long and Double
23870 
23871 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23872   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23873             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23874   match(Set dst (VectorLoadShuffle src));
23875   effect(TEMP dst, TEMP vtmp);
23876   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23877   ins_encode %{
23878     assert(UseAVX >= 2, "required");
23879 
23880     int vlen_enc = vector_length_encoding(this);
23881     // Create a double word shuffle mask from long shuffle mask
23882     // only double word shuffle instruction available on these platforms
23883 
23884     // Multiply each shuffle by two to get double word index
23885     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23886 
23887     // Duplicate each double word shuffle
23888     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23889     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23890 
23891     // Add one to get alternate double word index
23892     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23893   %}
23894   ins_pipe( pipe_slow );
23895 %}
23896 
23897 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23898   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23899             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23900   match(Set dst (VectorRearrange src shuffle));
23901   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23902   ins_encode %{
23903     assert(UseAVX >= 2, "required");
23904 
23905     int vlen_enc = vector_length_encoding(this);
23906     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23907   %}
23908   ins_pipe( pipe_slow );
23909 %}
23910 
23911 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23912   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23913             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23914   match(Set dst (VectorRearrange src shuffle));
23915   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23916   ins_encode %{
23917     assert(UseAVX > 2, "required");
23918 
23919     int vlen_enc = vector_length_encoding(this);
23920     if (vlen_enc == Assembler::AVX_128bit) {
23921       vlen_enc = Assembler::AVX_256bit;
23922     }
23923     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23924   %}
23925   ins_pipe( pipe_slow );
23926 %}
23927 
23928 // --------------------------------- FMA --------------------------------------
23929 // a * b + c
23930 
23931 instruct vfmaF_reg(vec a, vec b, vec c) %{
23932   match(Set c (FmaVF  c (Binary a b)));
23933   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23934   ins_cost(150);
23935   ins_encode %{
23936     assert(UseFMA, "not enabled");
23937     int vlen_enc = vector_length_encoding(this);
23938     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23939   %}
23940   ins_pipe( pipe_slow );
23941 %}
23942 
23943 instruct vfmaF_mem(vec a, memory b, vec c) %{
23944   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23945   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23946   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23947   ins_cost(150);
23948   ins_encode %{
23949     assert(UseFMA, "not enabled");
23950     int vlen_enc = vector_length_encoding(this);
23951     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23952   %}
23953   ins_pipe( pipe_slow );
23954 %}
23955 
23956 instruct vfmaD_reg(vec a, vec b, vec c) %{
23957   match(Set c (FmaVD  c (Binary a b)));
23958   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23959   ins_cost(150);
23960   ins_encode %{
23961     assert(UseFMA, "not enabled");
23962     int vlen_enc = vector_length_encoding(this);
23963     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23964   %}
23965   ins_pipe( pipe_slow );
23966 %}
23967 
23968 instruct vfmaD_mem(vec a, memory b, vec c) %{
23969   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23970   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23971   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23972   ins_cost(150);
23973   ins_encode %{
23974     assert(UseFMA, "not enabled");
23975     int vlen_enc = vector_length_encoding(this);
23976     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23977   %}
23978   ins_pipe( pipe_slow );
23979 %}
23980 
23981 // --------------------------------- Vector Multiply Add --------------------------------------
23982 
23983 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23984   predicate(UseAVX == 0);
23985   match(Set dst (MulAddVS2VI dst src1));
23986   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23987   ins_encode %{
23988     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23989   %}
23990   ins_pipe( pipe_slow );
23991 %}
23992 
23993 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23994   predicate(UseAVX > 0);
23995   match(Set dst (MulAddVS2VI src1 src2));
23996   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23997   ins_encode %{
23998     int vlen_enc = vector_length_encoding(this);
23999     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
24000   %}
24001   ins_pipe( pipe_slow );
24002 %}
24003 
24004 // --------------------------------- Vector Multiply Add Add ----------------------------------
24005 
24006 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
24007   predicate(VM_Version::supports_avx512_vnni());
24008   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
24009   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
24010   ins_encode %{
24011     assert(UseAVX > 2, "required");
24012     int vlen_enc = vector_length_encoding(this);
24013     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
24014   %}
24015   ins_pipe( pipe_slow );
24016   ins_cost(10);
24017 %}
24018 
24019 // --------------------------------- PopCount --------------------------------------
24020 
24021 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
24022   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24023   match(Set dst (PopCountVI src));
24024   match(Set dst (PopCountVL src));
24025   format %{ "vector_popcount_integral $dst, $src" %}
24026   ins_encode %{
24027     int opcode = this->ideal_Opcode();
24028     int vlen_enc = vector_length_encoding(this, $src);
24029     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24030     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
24031   %}
24032   ins_pipe( pipe_slow );
24033 %}
24034 
24035 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
24036   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24037   match(Set dst (PopCountVI src mask));
24038   match(Set dst (PopCountVL src mask));
24039   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
24040   ins_encode %{
24041     int vlen_enc = vector_length_encoding(this, $src);
24042     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24043     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24044     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
24045   %}
24046   ins_pipe( pipe_slow );
24047 %}
24048 
24049 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
24050   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24051   match(Set dst (PopCountVI src));
24052   match(Set dst (PopCountVL src));
24053   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24054   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
24055   ins_encode %{
24056     int opcode = this->ideal_Opcode();
24057     int vlen_enc = vector_length_encoding(this, $src);
24058     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24059     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24060                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
24061   %}
24062   ins_pipe( pipe_slow );
24063 %}
24064 
24065 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
24066 
24067 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
24068   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24069                                               Matcher::vector_length_in_bytes(n->in(1))));
24070   match(Set dst (CountTrailingZerosV src));
24071   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
24072   ins_cost(400);
24073   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
24074   ins_encode %{
24075     int vlen_enc = vector_length_encoding(this, $src);
24076     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24077     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24078                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24079   %}
24080   ins_pipe( pipe_slow );
24081 %}
24082 
24083 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24084   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24085             VM_Version::supports_avx512cd() &&
24086             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24087   match(Set dst (CountTrailingZerosV src));
24088   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24089   ins_cost(400);
24090   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
24091   ins_encode %{
24092     int vlen_enc = vector_length_encoding(this, $src);
24093     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24094     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24095                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24096   %}
24097   ins_pipe( pipe_slow );
24098 %}
24099 
24100 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
24101   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24102   match(Set dst (CountTrailingZerosV src));
24103   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
24104   ins_cost(400);
24105   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
24106   ins_encode %{
24107     int vlen_enc = vector_length_encoding(this, $src);
24108     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24109     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24110                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
24111                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
24112   %}
24113   ins_pipe( pipe_slow );
24114 %}
24115 
24116 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24117   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24118   match(Set dst (CountTrailingZerosV src));
24119   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24120   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24121   ins_encode %{
24122     int vlen_enc = vector_length_encoding(this, $src);
24123     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24124     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24125                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24126   %}
24127   ins_pipe( pipe_slow );
24128 %}
24129 
24130 
24131 // --------------------------------- Bitwise Ternary Logic ----------------------------------
24132 
24133 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
24134   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
24135   effect(TEMP dst);
24136   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24137   ins_encode %{
24138     int vector_len = vector_length_encoding(this);
24139     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
24140   %}
24141   ins_pipe( pipe_slow );
24142 %}
24143 
24144 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
24145   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
24146   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
24147   effect(TEMP dst);
24148   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24149   ins_encode %{
24150     int vector_len = vector_length_encoding(this);
24151     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
24152   %}
24153   ins_pipe( pipe_slow );
24154 %}
24155 
24156 // --------------------------------- Rotation Operations ----------------------------------
24157 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
24158   match(Set dst (RotateLeftV src shift));
24159   match(Set dst (RotateRightV src shift));
24160   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
24161   ins_encode %{
24162     int opcode      = this->ideal_Opcode();
24163     int vector_len  = vector_length_encoding(this);
24164     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24165     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
24166   %}
24167   ins_pipe( pipe_slow );
24168 %}
24169 
24170 instruct vprorate(vec dst, vec src, vec shift) %{
24171   match(Set dst (RotateLeftV src shift));
24172   match(Set dst (RotateRightV src shift));
24173   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
24174   ins_encode %{
24175     int opcode      = this->ideal_Opcode();
24176     int vector_len  = vector_length_encoding(this);
24177     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24178     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
24179   %}
24180   ins_pipe( pipe_slow );
24181 %}
24182 
24183 // ---------------------------------- Masked Operations ------------------------------------
24184 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
24185   predicate(!n->in(3)->bottom_type()->isa_vectmask());
24186   match(Set dst (LoadVectorMasked mem mask));
24187   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24188   ins_encode %{
24189     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24190     int vlen_enc = vector_length_encoding(this);
24191     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
24192   %}
24193   ins_pipe( pipe_slow );
24194 %}
24195 
24196 
24197 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
24198   predicate(n->in(3)->bottom_type()->isa_vectmask());
24199   match(Set dst (LoadVectorMasked mem mask));
24200   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24201   ins_encode %{
24202     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
24203     int vector_len = vector_length_encoding(this);
24204     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
24205   %}
24206   ins_pipe( pipe_slow );
24207 %}
24208 
24209 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
24210   predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
24211   match(Set mem (StoreVectorMasked mem (Binary src mask)));
24212   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24213   ins_encode %{
24214     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24215     int vlen_enc = vector_length_encoding(src_node);
24216     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
24217     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
24218   %}
24219   ins_pipe( pipe_slow );
24220 %}
24221 
24222 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
24223   predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
24224   match(Set mem (StoreVectorMasked mem (Binary src mask)));
24225   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24226   ins_encode %{
24227     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24228     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
24229     int vlen_enc = vector_length_encoding(src_node);
24230     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
24231   %}
24232   ins_pipe( pipe_slow );
24233 %}
24234 
24235 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
24236   match(Set addr (VerifyVectorAlignment addr mask));
24237   effect(KILL cr);
24238   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
24239   ins_encode %{
24240     Label Lskip;
24241     // check if masked bits of addr are zero
24242     __ testq($addr$$Register, $mask$$constant);
24243     __ jccb(Assembler::equal, Lskip);
24244     __ stop("verify_vector_alignment found a misaligned vector memory access");
24245     __ bind(Lskip);
24246   %}
24247   ins_pipe(pipe_slow);
24248 %}
24249 
24250 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
24251   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
24252   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
24253   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
24254   ins_encode %{
24255     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
24256     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
24257 
24258     Label DONE;
24259     int vlen_enc = vector_length_encoding(this, $src1);
24260     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
24261 
24262     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
24263     __ mov64($dst$$Register, -1L);
24264     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
24265     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
24266     __ jccb(Assembler::carrySet, DONE);
24267     __ kmovql($dst$$Register, $ktmp1$$KRegister);
24268     __ notq($dst$$Register);
24269     __ tzcntq($dst$$Register, $dst$$Register);
24270     __ bind(DONE);
24271   %}
24272   ins_pipe( pipe_slow );
24273 %}
24274 
24275 
24276 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
24277   match(Set dst (VectorMaskGen len));
24278   effect(TEMP temp, KILL cr);
24279   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
24280   ins_encode %{
24281     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
24282   %}
24283   ins_pipe( pipe_slow );
24284 %}
24285 
24286 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
24287   match(Set dst (VectorMaskGen len));
24288   format %{ "vector_mask_gen $len \t! vector mask generator" %}
24289   effect(TEMP temp);
24290   ins_encode %{
24291     if ($len$$constant > 0) {
24292       __ mov64($temp$$Register, right_n_bits($len$$constant));
24293       __ kmovql($dst$$KRegister, $temp$$Register);
24294     } else {
24295       __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
24296     }
24297   %}
24298   ins_pipe( pipe_slow );
24299 %}
24300 
24301 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
24302   predicate(n->in(1)->bottom_type()->isa_vectmask());
24303   match(Set dst (VectorMaskToLong mask));
24304   effect(TEMP dst, KILL cr);
24305   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
24306   ins_encode %{
24307     int opcode = this->ideal_Opcode();
24308     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24309     int mask_len = Matcher::vector_length(this, $mask);
24310     int mask_size = mask_len * type2aelembytes(mbt);
24311     int vlen_enc = vector_length_encoding(this, $mask);
24312     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24313                              $dst$$Register, mask_len, mask_size, vlen_enc);
24314   %}
24315   ins_pipe( pipe_slow );
24316 %}
24317 
24318 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24319   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24320   match(Set dst (VectorMaskToLong mask));
24321   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24322   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24323   ins_encode %{
24324     int opcode = this->ideal_Opcode();
24325     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24326     int mask_len = Matcher::vector_length(this, $mask);
24327     int vlen_enc = vector_length_encoding(this, $mask);
24328     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24329                              $dst$$Register, mask_len, mbt, vlen_enc);
24330   %}
24331   ins_pipe( pipe_slow );
24332 %}
24333 
24334 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24335   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24336   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24337   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24338   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24339   ins_encode %{
24340     int opcode = this->ideal_Opcode();
24341     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24342     int mask_len = Matcher::vector_length(this, $mask);
24343     int vlen_enc = vector_length_encoding(this, $mask);
24344     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24345                              $dst$$Register, mask_len, mbt, vlen_enc);
24346   %}
24347   ins_pipe( pipe_slow );
24348 %}
24349 
24350 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24351   predicate(n->in(1)->bottom_type()->isa_vectmask());
24352   match(Set dst (VectorMaskTrueCount mask));
24353   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24354   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24355   ins_encode %{
24356     int opcode = this->ideal_Opcode();
24357     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24358     int mask_len = Matcher::vector_length(this, $mask);
24359     int mask_size = mask_len * type2aelembytes(mbt);
24360     int vlen_enc = vector_length_encoding(this, $mask);
24361     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24362                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24363   %}
24364   ins_pipe( pipe_slow );
24365 %}
24366 
24367 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24368   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24369   match(Set dst (VectorMaskTrueCount mask));
24370   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24371   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24372   ins_encode %{
24373     int opcode = this->ideal_Opcode();
24374     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24375     int mask_len = Matcher::vector_length(this, $mask);
24376     int vlen_enc = vector_length_encoding(this, $mask);
24377     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24378                              $tmp$$Register, mask_len, mbt, vlen_enc);
24379   %}
24380   ins_pipe( pipe_slow );
24381 %}
24382 
24383 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24384   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24385   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24386   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24387   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24388   ins_encode %{
24389     int opcode = this->ideal_Opcode();
24390     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24391     int mask_len = Matcher::vector_length(this, $mask);
24392     int vlen_enc = vector_length_encoding(this, $mask);
24393     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24394                              $tmp$$Register, mask_len, mbt, vlen_enc);
24395   %}
24396   ins_pipe( pipe_slow );
24397 %}
24398 
24399 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24400   predicate(n->in(1)->bottom_type()->isa_vectmask());
24401   match(Set dst (VectorMaskFirstTrue mask));
24402   match(Set dst (VectorMaskLastTrue mask));
24403   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24404   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24405   ins_encode %{
24406     int opcode = this->ideal_Opcode();
24407     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24408     int mask_len = Matcher::vector_length(this, $mask);
24409     int mask_size = mask_len * type2aelembytes(mbt);
24410     int vlen_enc = vector_length_encoding(this, $mask);
24411     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24412                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24413   %}
24414   ins_pipe( pipe_slow );
24415 %}
24416 
24417 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24418   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24419   match(Set dst (VectorMaskFirstTrue mask));
24420   match(Set dst (VectorMaskLastTrue mask));
24421   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24422   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24423   ins_encode %{
24424     int opcode = this->ideal_Opcode();
24425     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24426     int mask_len = Matcher::vector_length(this, $mask);
24427     int vlen_enc = vector_length_encoding(this, $mask);
24428     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24429                              $tmp$$Register, mask_len, mbt, vlen_enc);
24430   %}
24431   ins_pipe( pipe_slow );
24432 %}
24433 
24434 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24435   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24436   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24437   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24438   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24439   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24440   ins_encode %{
24441     int opcode = this->ideal_Opcode();
24442     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24443     int mask_len = Matcher::vector_length(this, $mask);
24444     int vlen_enc = vector_length_encoding(this, $mask);
24445     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24446                              $tmp$$Register, mask_len, mbt, vlen_enc);
24447   %}
24448   ins_pipe( pipe_slow );
24449 %}
24450 
24451 // --------------------------------- Compress/Expand Operations ---------------------------
24452 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24453   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24454   match(Set dst (CompressV src mask));
24455   match(Set dst (ExpandV src mask));
24456   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24457   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24458   ins_encode %{
24459     int opcode = this->ideal_Opcode();
24460     int vlen_enc = vector_length_encoding(this);
24461     BasicType bt  = Matcher::vector_element_basic_type(this);
24462     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24463                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24464   %}
24465   ins_pipe( pipe_slow );
24466 %}
24467 
24468 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24469   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24470   match(Set dst (CompressV src mask));
24471   match(Set dst (ExpandV src mask));
24472   format %{ "vector_compress_expand $dst, $src, $mask" %}
24473   ins_encode %{
24474     int opcode = this->ideal_Opcode();
24475     int vector_len = vector_length_encoding(this);
24476     BasicType bt  = Matcher::vector_element_basic_type(this);
24477     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24478   %}
24479   ins_pipe( pipe_slow );
24480 %}
24481 
24482 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24483   match(Set dst (CompressM mask));
24484   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24485   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24486   ins_encode %{
24487     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24488     int mask_len = Matcher::vector_length(this);
24489     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24490   %}
24491   ins_pipe( pipe_slow );
24492 %}
24493 
24494 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24495 
24496 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24497   predicate(!VM_Version::supports_gfni());
24498   match(Set dst (ReverseV src));
24499   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24500   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24501   ins_encode %{
24502     int vec_enc = vector_length_encoding(this);
24503     BasicType bt = Matcher::vector_element_basic_type(this);
24504     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24505                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24506   %}
24507   ins_pipe( pipe_slow );
24508 %}
24509 
24510 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24511   predicate(VM_Version::supports_gfni());
24512   match(Set dst (ReverseV src));
24513   effect(TEMP dst, TEMP xtmp);
24514   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24515   ins_encode %{
24516     int vec_enc = vector_length_encoding(this);
24517     BasicType bt  = Matcher::vector_element_basic_type(this);
24518     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24519     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24520                                $xtmp$$XMMRegister);
24521   %}
24522   ins_pipe( pipe_slow );
24523 %}
24524 
24525 instruct vreverse_byte_reg(vec dst, vec src) %{
24526   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24527   match(Set dst (ReverseBytesV src));
24528   effect(TEMP dst);
24529   format %{ "vector_reverse_byte $dst, $src" %}
24530   ins_encode %{
24531     int vec_enc = vector_length_encoding(this);
24532     BasicType bt = Matcher::vector_element_basic_type(this);
24533     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24534   %}
24535   ins_pipe( pipe_slow );
24536 %}
24537 
24538 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24539   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24540   match(Set dst (ReverseBytesV src));
24541   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24542   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24543   ins_encode %{
24544     int vec_enc = vector_length_encoding(this);
24545     BasicType bt = Matcher::vector_element_basic_type(this);
24546     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24547                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24548   %}
24549   ins_pipe( pipe_slow );
24550 %}
24551 
24552 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24553 
24554 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24555   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24556                                               Matcher::vector_length_in_bytes(n->in(1))));
24557   match(Set dst (CountLeadingZerosV src));
24558   format %{ "vector_count_leading_zeros $dst, $src" %}
24559   ins_encode %{
24560      int vlen_enc = vector_length_encoding(this, $src);
24561      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24562      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24563                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24564   %}
24565   ins_pipe( pipe_slow );
24566 %}
24567 
24568 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24569   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24570                                               Matcher::vector_length_in_bytes(n->in(1))));
24571   match(Set dst (CountLeadingZerosV src mask));
24572   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24573   ins_encode %{
24574     int vlen_enc = vector_length_encoding(this, $src);
24575     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24576     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24577     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24578                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24579   %}
24580   ins_pipe( pipe_slow );
24581 %}
24582 
24583 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24584   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24585             VM_Version::supports_avx512cd() &&
24586             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24587   match(Set dst (CountLeadingZerosV src));
24588   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24589   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24590   ins_encode %{
24591     int vlen_enc = vector_length_encoding(this, $src);
24592     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24593     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24594                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24595   %}
24596   ins_pipe( pipe_slow );
24597 %}
24598 
24599 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24600   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24601   match(Set dst (CountLeadingZerosV src));
24602   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24603   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24604   ins_encode %{
24605     int vlen_enc = vector_length_encoding(this, $src);
24606     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24607     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24608                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24609                                        $rtmp$$Register, true, vlen_enc);
24610   %}
24611   ins_pipe( pipe_slow );
24612 %}
24613 
24614 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24615   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24616             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24617   match(Set dst (CountLeadingZerosV src));
24618   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24619   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24620   ins_encode %{
24621     int vlen_enc = vector_length_encoding(this, $src);
24622     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24623     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24624                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24625   %}
24626   ins_pipe( pipe_slow );
24627 %}
24628 
24629 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24630   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24631             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24632   match(Set dst (CountLeadingZerosV src));
24633   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24634   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24635   ins_encode %{
24636     int vlen_enc = vector_length_encoding(this, $src);
24637     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24638     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24639                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24640   %}
24641   ins_pipe( pipe_slow );
24642 %}
24643 
24644 // ---------------------------------- Vector Masked Operations ------------------------------------
24645 
24646 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24647   match(Set dst (AddVB (Binary dst src2) mask));
24648   match(Set dst (AddVS (Binary dst src2) mask));
24649   match(Set dst (AddVI (Binary dst src2) mask));
24650   match(Set dst (AddVL (Binary dst src2) mask));
24651   match(Set dst (AddVF (Binary dst src2) mask));
24652   match(Set dst (AddVD (Binary dst src2) mask));
24653   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24654   ins_encode %{
24655     int vlen_enc = vector_length_encoding(this);
24656     BasicType bt = Matcher::vector_element_basic_type(this);
24657     int opc = this->ideal_Opcode();
24658     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24659                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24660   %}
24661   ins_pipe( pipe_slow );
24662 %}
24663 
24664 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24665   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24666   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24667   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24668   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24669   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24670   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24671   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24672   ins_encode %{
24673     int vlen_enc = vector_length_encoding(this);
24674     BasicType bt = Matcher::vector_element_basic_type(this);
24675     int opc = this->ideal_Opcode();
24676     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24677                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24678   %}
24679   ins_pipe( pipe_slow );
24680 %}
24681 
24682 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24683   match(Set dst (XorV (Binary dst src2) mask));
24684   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24685   ins_encode %{
24686     int vlen_enc = vector_length_encoding(this);
24687     BasicType bt = Matcher::vector_element_basic_type(this);
24688     int opc = this->ideal_Opcode();
24689     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24690                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24691   %}
24692   ins_pipe( pipe_slow );
24693 %}
24694 
24695 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24696   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24697   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24698   ins_encode %{
24699     int vlen_enc = vector_length_encoding(this);
24700     BasicType bt = Matcher::vector_element_basic_type(this);
24701     int opc = this->ideal_Opcode();
24702     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24703                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24704   %}
24705   ins_pipe( pipe_slow );
24706 %}
24707 
24708 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24709   match(Set dst (OrV (Binary dst src2) mask));
24710   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24711   ins_encode %{
24712     int vlen_enc = vector_length_encoding(this);
24713     BasicType bt = Matcher::vector_element_basic_type(this);
24714     int opc = this->ideal_Opcode();
24715     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24716                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24717   %}
24718   ins_pipe( pipe_slow );
24719 %}
24720 
24721 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24722   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24723   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24724   ins_encode %{
24725     int vlen_enc = vector_length_encoding(this);
24726     BasicType bt = Matcher::vector_element_basic_type(this);
24727     int opc = this->ideal_Opcode();
24728     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24729                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24730   %}
24731   ins_pipe( pipe_slow );
24732 %}
24733 
24734 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24735   match(Set dst (AndV (Binary dst src2) mask));
24736   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24737   ins_encode %{
24738     int vlen_enc = vector_length_encoding(this);
24739     BasicType bt = Matcher::vector_element_basic_type(this);
24740     int opc = this->ideal_Opcode();
24741     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24742                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24743   %}
24744   ins_pipe( pipe_slow );
24745 %}
24746 
24747 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24748   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24749   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24750   ins_encode %{
24751     int vlen_enc = vector_length_encoding(this);
24752     BasicType bt = Matcher::vector_element_basic_type(this);
24753     int opc = this->ideal_Opcode();
24754     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24755                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24756   %}
24757   ins_pipe( pipe_slow );
24758 %}
24759 
24760 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24761   match(Set dst (SubVB (Binary dst src2) mask));
24762   match(Set dst (SubVS (Binary dst src2) mask));
24763   match(Set dst (SubVI (Binary dst src2) mask));
24764   match(Set dst (SubVL (Binary dst src2) mask));
24765   match(Set dst (SubVF (Binary dst src2) mask));
24766   match(Set dst (SubVD (Binary dst src2) mask));
24767   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24768   ins_encode %{
24769     int vlen_enc = vector_length_encoding(this);
24770     BasicType bt = Matcher::vector_element_basic_type(this);
24771     int opc = this->ideal_Opcode();
24772     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24773                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24774   %}
24775   ins_pipe( pipe_slow );
24776 %}
24777 
24778 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24779   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24780   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24781   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24782   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24783   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24784   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24785   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24786   ins_encode %{
24787     int vlen_enc = vector_length_encoding(this);
24788     BasicType bt = Matcher::vector_element_basic_type(this);
24789     int opc = this->ideal_Opcode();
24790     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24791                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24792   %}
24793   ins_pipe( pipe_slow );
24794 %}
24795 
24796 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24797   match(Set dst (MulVS (Binary dst src2) mask));
24798   match(Set dst (MulVI (Binary dst src2) mask));
24799   match(Set dst (MulVL (Binary dst src2) mask));
24800   match(Set dst (MulVF (Binary dst src2) mask));
24801   match(Set dst (MulVD (Binary dst src2) mask));
24802   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24803   ins_encode %{
24804     int vlen_enc = vector_length_encoding(this);
24805     BasicType bt = Matcher::vector_element_basic_type(this);
24806     int opc = this->ideal_Opcode();
24807     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24808                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24809   %}
24810   ins_pipe( pipe_slow );
24811 %}
24812 
24813 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24814   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24815   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24816   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24817   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24818   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24819   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24820   ins_encode %{
24821     int vlen_enc = vector_length_encoding(this);
24822     BasicType bt = Matcher::vector_element_basic_type(this);
24823     int opc = this->ideal_Opcode();
24824     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24825                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24826   %}
24827   ins_pipe( pipe_slow );
24828 %}
24829 
24830 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24831   match(Set dst (SqrtVF dst mask));
24832   match(Set dst (SqrtVD dst mask));
24833   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24834   ins_encode %{
24835     int vlen_enc = vector_length_encoding(this);
24836     BasicType bt = Matcher::vector_element_basic_type(this);
24837     int opc = this->ideal_Opcode();
24838     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24839                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24840   %}
24841   ins_pipe( pipe_slow );
24842 %}
24843 
24844 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24845   match(Set dst (DivVF (Binary dst src2) mask));
24846   match(Set dst (DivVD (Binary dst src2) mask));
24847   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24848   ins_encode %{
24849     int vlen_enc = vector_length_encoding(this);
24850     BasicType bt = Matcher::vector_element_basic_type(this);
24851     int opc = this->ideal_Opcode();
24852     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24853                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24854   %}
24855   ins_pipe( pipe_slow );
24856 %}
24857 
24858 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24859   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24860   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24861   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24862   ins_encode %{
24863     int vlen_enc = vector_length_encoding(this);
24864     BasicType bt = Matcher::vector_element_basic_type(this);
24865     int opc = this->ideal_Opcode();
24866     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24867                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24868   %}
24869   ins_pipe( pipe_slow );
24870 %}
24871 
24872 
24873 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24874   match(Set dst (RotateLeftV (Binary dst shift) mask));
24875   match(Set dst (RotateRightV (Binary dst shift) mask));
24876   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24877   ins_encode %{
24878     int vlen_enc = vector_length_encoding(this);
24879     BasicType bt = Matcher::vector_element_basic_type(this);
24880     int opc = this->ideal_Opcode();
24881     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24882                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24883   %}
24884   ins_pipe( pipe_slow );
24885 %}
24886 
24887 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24888   match(Set dst (RotateLeftV (Binary dst src2) mask));
24889   match(Set dst (RotateRightV (Binary dst src2) mask));
24890   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24891   ins_encode %{
24892     int vlen_enc = vector_length_encoding(this);
24893     BasicType bt = Matcher::vector_element_basic_type(this);
24894     int opc = this->ideal_Opcode();
24895     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24896                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24897   %}
24898   ins_pipe( pipe_slow );
24899 %}
24900 
24901 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24902   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24903   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24904   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24905   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24906   ins_encode %{
24907     int vlen_enc = vector_length_encoding(this);
24908     BasicType bt = Matcher::vector_element_basic_type(this);
24909     int opc = this->ideal_Opcode();
24910     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24911                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24912   %}
24913   ins_pipe( pipe_slow );
24914 %}
24915 
24916 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24917   predicate(!n->as_ShiftV()->is_var_shift());
24918   match(Set dst (LShiftVS (Binary dst src2) mask));
24919   match(Set dst (LShiftVI (Binary dst src2) mask));
24920   match(Set dst (LShiftVL (Binary dst src2) mask));
24921   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24922   ins_encode %{
24923     int vlen_enc = vector_length_encoding(this);
24924     BasicType bt = Matcher::vector_element_basic_type(this);
24925     int opc = this->ideal_Opcode();
24926     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24927                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24928   %}
24929   ins_pipe( pipe_slow );
24930 %}
24931 
24932 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24933   predicate(n->as_ShiftV()->is_var_shift());
24934   match(Set dst (LShiftVS (Binary dst src2) mask));
24935   match(Set dst (LShiftVI (Binary dst src2) mask));
24936   match(Set dst (LShiftVL (Binary dst src2) mask));
24937   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24938   ins_encode %{
24939     int vlen_enc = vector_length_encoding(this);
24940     BasicType bt = Matcher::vector_element_basic_type(this);
24941     int opc = this->ideal_Opcode();
24942     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24943                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24944   %}
24945   ins_pipe( pipe_slow );
24946 %}
24947 
24948 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24949   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24950   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24951   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24952   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24953   ins_encode %{
24954     int vlen_enc = vector_length_encoding(this);
24955     BasicType bt = Matcher::vector_element_basic_type(this);
24956     int opc = this->ideal_Opcode();
24957     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24958                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24959   %}
24960   ins_pipe( pipe_slow );
24961 %}
24962 
24963 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24964   predicate(!n->as_ShiftV()->is_var_shift());
24965   match(Set dst (RShiftVS (Binary dst src2) mask));
24966   match(Set dst (RShiftVI (Binary dst src2) mask));
24967   match(Set dst (RShiftVL (Binary dst src2) mask));
24968   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24969   ins_encode %{
24970     int vlen_enc = vector_length_encoding(this);
24971     BasicType bt = Matcher::vector_element_basic_type(this);
24972     int opc = this->ideal_Opcode();
24973     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24974                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24975   %}
24976   ins_pipe( pipe_slow );
24977 %}
24978 
24979 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24980   predicate(n->as_ShiftV()->is_var_shift());
24981   match(Set dst (RShiftVS (Binary dst src2) mask));
24982   match(Set dst (RShiftVI (Binary dst src2) mask));
24983   match(Set dst (RShiftVL (Binary dst src2) mask));
24984   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24985   ins_encode %{
24986     int vlen_enc = vector_length_encoding(this);
24987     BasicType bt = Matcher::vector_element_basic_type(this);
24988     int opc = this->ideal_Opcode();
24989     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24990                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24991   %}
24992   ins_pipe( pipe_slow );
24993 %}
24994 
24995 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24996   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24997   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24998   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24999   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
25000   ins_encode %{
25001     int vlen_enc = vector_length_encoding(this);
25002     BasicType bt = Matcher::vector_element_basic_type(this);
25003     int opc = this->ideal_Opcode();
25004     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25005                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
25006   %}
25007   ins_pipe( pipe_slow );
25008 %}
25009 
25010 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
25011   predicate(!n->as_ShiftV()->is_var_shift());
25012   match(Set dst (URShiftVS (Binary dst src2) mask));
25013   match(Set dst (URShiftVI (Binary dst src2) mask));
25014   match(Set dst (URShiftVL (Binary dst src2) mask));
25015   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
25016   ins_encode %{
25017     int vlen_enc = vector_length_encoding(this);
25018     BasicType bt = Matcher::vector_element_basic_type(this);
25019     int opc = this->ideal_Opcode();
25020     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25021                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
25022   %}
25023   ins_pipe( pipe_slow );
25024 %}
25025 
25026 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
25027   predicate(n->as_ShiftV()->is_var_shift());
25028   match(Set dst (URShiftVS (Binary dst src2) mask));
25029   match(Set dst (URShiftVI (Binary dst src2) mask));
25030   match(Set dst (URShiftVL (Binary dst src2) mask));
25031   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
25032   ins_encode %{
25033     int vlen_enc = vector_length_encoding(this);
25034     BasicType bt = Matcher::vector_element_basic_type(this);
25035     int opc = this->ideal_Opcode();
25036     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25037                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
25038   %}
25039   ins_pipe( pipe_slow );
25040 %}
25041 
25042 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
25043   match(Set dst (MaxV (Binary dst src2) mask));
25044   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
25045   ins_encode %{
25046     int vlen_enc = vector_length_encoding(this);
25047     BasicType bt = Matcher::vector_element_basic_type(this);
25048     int opc = this->ideal_Opcode();
25049     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25050                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25051   %}
25052   ins_pipe( pipe_slow );
25053 %}
25054 
25055 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
25056   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
25057   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
25058   ins_encode %{
25059     int vlen_enc = vector_length_encoding(this);
25060     BasicType bt = Matcher::vector_element_basic_type(this);
25061     int opc = this->ideal_Opcode();
25062     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25063                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
25064   %}
25065   ins_pipe( pipe_slow );
25066 %}
25067 
25068 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
25069   match(Set dst (MinV (Binary dst src2) mask));
25070   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25071   ins_encode %{
25072     int vlen_enc = vector_length_encoding(this);
25073     BasicType bt = Matcher::vector_element_basic_type(this);
25074     int opc = this->ideal_Opcode();
25075     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25076                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25077   %}
25078   ins_pipe( pipe_slow );
25079 %}
25080 
25081 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
25082   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
25083   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25084   ins_encode %{
25085     int vlen_enc = vector_length_encoding(this);
25086     BasicType bt = Matcher::vector_element_basic_type(this);
25087     int opc = this->ideal_Opcode();
25088     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25089                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
25090   %}
25091   ins_pipe( pipe_slow );
25092 %}
25093 
25094 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
25095   match(Set dst (VectorRearrange (Binary dst src2) mask));
25096   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
25097   ins_encode %{
25098     int vlen_enc = vector_length_encoding(this);
25099     BasicType bt = Matcher::vector_element_basic_type(this);
25100     int opc = this->ideal_Opcode();
25101     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25102                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25103   %}
25104   ins_pipe( pipe_slow );
25105 %}
25106 
25107 instruct vabs_masked(vec dst, kReg mask) %{
25108   match(Set dst (AbsVB dst mask));
25109   match(Set dst (AbsVS dst mask));
25110   match(Set dst (AbsVI dst mask));
25111   match(Set dst (AbsVL dst mask));
25112   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
25113   ins_encode %{
25114     int vlen_enc = vector_length_encoding(this);
25115     BasicType bt = Matcher::vector_element_basic_type(this);
25116     int opc = this->ideal_Opcode();
25117     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25118                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
25119   %}
25120   ins_pipe( pipe_slow );
25121 %}
25122 
25123 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
25124   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
25125   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
25126   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25127   ins_encode %{
25128     assert(UseFMA, "Needs FMA instructions support.");
25129     int vlen_enc = vector_length_encoding(this);
25130     BasicType bt = Matcher::vector_element_basic_type(this);
25131     int opc = this->ideal_Opcode();
25132     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25133                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
25134   %}
25135   ins_pipe( pipe_slow );
25136 %}
25137 
25138 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
25139   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
25140   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
25141   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25142   ins_encode %{
25143     assert(UseFMA, "Needs FMA instructions support.");
25144     int vlen_enc = vector_length_encoding(this);
25145     BasicType bt = Matcher::vector_element_basic_type(this);
25146     int opc = this->ideal_Opcode();
25147     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25148                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
25149   %}
25150   ins_pipe( pipe_slow );
25151 %}
25152 
25153 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
25154   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
25155   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
25156   ins_encode %{
25157     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
25158     int vlen_enc = vector_length_encoding(this, $src1);
25159     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
25160 
25161     // Comparison i
25162     switch (src1_elem_bt) {
25163       case T_BYTE: {
25164         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25165         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25166         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25167         break;
25168       }
25169       case T_SHORT: {
25170         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25171         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25172         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25173         break;
25174       }
25175       case T_INT: {
25176         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25177         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25178         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25179         break;
25180       }
25181       case T_LONG: {
25182         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25183         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25184         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25185         break;
25186       }
25187       case T_FLOAT: {
25188         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25189         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25190         break;
25191       }
25192       case T_DOUBLE: {
25193         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25194         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25195         break;
25196       }
25197       default: assert(false, "%s", type2name(src1_elem_bt)); break;
25198     }
25199   %}
25200   ins_pipe( pipe_slow );
25201 %}
25202 
25203 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
25204   predicate(Matcher::vector_length(n) <= 32);
25205   match(Set dst (MaskAll src));
25206   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
25207   ins_encode %{
25208     int mask_len = Matcher::vector_length(this);
25209     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
25210   %}
25211   ins_pipe( pipe_slow );
25212 %}
25213 
25214 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
25215   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
25216   match(Set dst (XorVMask src (MaskAll cnt)));
25217   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
25218   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
25219   ins_encode %{
25220     uint masklen = Matcher::vector_length(this);
25221     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
25222   %}
25223   ins_pipe( pipe_slow );
25224 %}
25225 
25226 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
25227   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
25228             (Matcher::vector_length(n) == 16) ||
25229             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
25230   match(Set dst (XorVMask src (MaskAll cnt)));
25231   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
25232   ins_encode %{
25233     uint masklen = Matcher::vector_length(this);
25234     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
25235   %}
25236   ins_pipe( pipe_slow );
25237 %}
25238 
25239 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
25240   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
25241   match(Set dst (VectorLongToMask src));
25242   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
25243   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
25244   ins_encode %{
25245     int mask_len = Matcher::vector_length(this);
25246     int vec_enc  = vector_length_encoding(mask_len);
25247     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25248                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
25249   %}
25250   ins_pipe( pipe_slow );
25251 %}
25252 
25253 
25254 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
25255   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
25256   match(Set dst (VectorLongToMask src));
25257   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
25258   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
25259   ins_encode %{
25260     int mask_len = Matcher::vector_length(this);
25261     assert(mask_len <= 32, "invalid mask length");
25262     int vec_enc  = vector_length_encoding(mask_len);
25263     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25264                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
25265   %}
25266   ins_pipe( pipe_slow );
25267 %}
25268 
25269 instruct long_to_mask_evex(kReg dst, rRegL src) %{
25270   predicate(n->bottom_type()->isa_vectmask());
25271   match(Set dst (VectorLongToMask src));
25272   format %{ "long_to_mask_evex $dst, $src\t!" %}
25273   ins_encode %{
25274     __ kmov($dst$$KRegister, $src$$Register);
25275   %}
25276   ins_pipe( pipe_slow );
25277 %}
25278 
25279 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
25280   match(Set dst (AndVMask src1 src2));
25281   match(Set dst (OrVMask src1 src2));
25282   match(Set dst (XorVMask src1 src2));
25283   effect(TEMP kscratch);
25284   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
25285   ins_encode %{
25286     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
25287     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
25288     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
25289     uint masklen = Matcher::vector_length(this);
25290     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
25291     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
25292   %}
25293   ins_pipe( pipe_slow );
25294 %}
25295 
25296 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
25297   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25298   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25299   ins_encode %{
25300     int vlen_enc = vector_length_encoding(this);
25301     BasicType bt = Matcher::vector_element_basic_type(this);
25302     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25303                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
25304   %}
25305   ins_pipe( pipe_slow );
25306 %}
25307 
25308 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
25309   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25310   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25311   ins_encode %{
25312     int vlen_enc = vector_length_encoding(this);
25313     BasicType bt = Matcher::vector_element_basic_type(this);
25314     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25315                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25316   %}
25317   ins_pipe( pipe_slow );
25318 %}
25319 
25320 instruct castMM(kReg dst)
25321 %{
25322   match(Set dst (CastVV dst));
25323 
25324   size(0);
25325   format %{ "# castVV of $dst" %}
25326   ins_encode(/* empty encoding */);
25327   ins_cost(0);
25328   ins_pipe(empty);
25329 %}
25330 
25331 instruct castVV(vec dst)
25332 %{
25333   match(Set dst (CastVV dst));
25334 
25335   size(0);
25336   format %{ "# castVV of $dst" %}
25337   ins_encode(/* empty encoding */);
25338   ins_cost(0);
25339   ins_pipe(empty);
25340 %}
25341 
25342 instruct castVVLeg(legVec dst)
25343 %{
25344   match(Set dst (CastVV dst));
25345 
25346   size(0);
25347   format %{ "# castVV of $dst" %}
25348   ins_encode(/* empty encoding */);
25349   ins_cost(0);
25350   ins_pipe(empty);
25351 %}
25352 
25353 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25354 %{
25355   match(Set dst (IsInfiniteF src));
25356   effect(TEMP ktmp, KILL cr);
25357   format %{ "float_class_check $dst, $src" %}
25358   ins_encode %{
25359     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25360     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25361   %}
25362   ins_pipe(pipe_slow);
25363 %}
25364 
25365 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25366 %{
25367   match(Set dst (IsInfiniteD src));
25368   effect(TEMP ktmp, KILL cr);
25369   format %{ "double_class_check $dst, $src" %}
25370   ins_encode %{
25371     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25372     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25373   %}
25374   ins_pipe(pipe_slow);
25375 %}
25376 
25377 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25378 %{
25379   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25380             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25381   match(Set dst (SaturatingAddV src1 src2));
25382   match(Set dst (SaturatingSubV src1 src2));
25383   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25384   ins_encode %{
25385     int vlen_enc = vector_length_encoding(this);
25386     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25387     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25388                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25389   %}
25390   ins_pipe(pipe_slow);
25391 %}
25392 
25393 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25394 %{
25395   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25396             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25397   match(Set dst (SaturatingAddV src1 src2));
25398   match(Set dst (SaturatingSubV src1 src2));
25399   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25400   ins_encode %{
25401     int vlen_enc = vector_length_encoding(this);
25402     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25403     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25404                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25405   %}
25406   ins_pipe(pipe_slow);
25407 %}
25408 
25409 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25410 %{
25411   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25412             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25413             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25414   match(Set dst (SaturatingAddV src1 src2));
25415   match(Set dst (SaturatingSubV src1 src2));
25416   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25417   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25418   ins_encode %{
25419     int vlen_enc = vector_length_encoding(this);
25420     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25421     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25422                                         $src1$$XMMRegister, $src2$$XMMRegister,
25423                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25424                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25425   %}
25426   ins_pipe(pipe_slow);
25427 %}
25428 
25429 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25430 %{
25431   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25432             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25433             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25434   match(Set dst (SaturatingAddV src1 src2));
25435   match(Set dst (SaturatingSubV src1 src2));
25436   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25437   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25438   ins_encode %{
25439     int vlen_enc = vector_length_encoding(this);
25440     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25441     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25442                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25443                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25444   %}
25445   ins_pipe(pipe_slow);
25446 %}
25447 
25448 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25449 %{
25450   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25451             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25452             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25453   match(Set dst (SaturatingAddV src1 src2));
25454   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25455   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25456   ins_encode %{
25457     int vlen_enc = vector_length_encoding(this);
25458     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25459     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25460                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25461   %}
25462   ins_pipe(pipe_slow);
25463 %}
25464 
25465 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25466 %{
25467   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25468             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25469             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25470   match(Set dst (SaturatingAddV src1 src2));
25471   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25472   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25473   ins_encode %{
25474     int vlen_enc = vector_length_encoding(this);
25475     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25476     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25477                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25478   %}
25479   ins_pipe(pipe_slow);
25480 %}
25481 
25482 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25483 %{
25484   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25485             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25486             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25487   match(Set dst (SaturatingSubV src1 src2));
25488   effect(TEMP ktmp);
25489   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25490   ins_encode %{
25491     int vlen_enc = vector_length_encoding(this);
25492     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25493     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25494                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25495   %}
25496   ins_pipe(pipe_slow);
25497 %}
25498 
25499 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25500 %{
25501   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25502             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25503             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25504   match(Set dst (SaturatingSubV src1 src2));
25505   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25506   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25507   ins_encode %{
25508     int vlen_enc = vector_length_encoding(this);
25509     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25510     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25511                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25512   %}
25513   ins_pipe(pipe_slow);
25514 %}
25515 
25516 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25517 %{
25518   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25519             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25520   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25521   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25522   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25523   ins_encode %{
25524     int vlen_enc = vector_length_encoding(this);
25525     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25526     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25527                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25528   %}
25529   ins_pipe(pipe_slow);
25530 %}
25531 
25532 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25533 %{
25534   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25535             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25536   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25537   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25538   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25539   ins_encode %{
25540     int vlen_enc = vector_length_encoding(this);
25541     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25542     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25543                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25544   %}
25545   ins_pipe(pipe_slow);
25546 %}
25547 
25548 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25549   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25550             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25551   match(Set dst (SaturatingAddV (Binary dst src) mask));
25552   match(Set dst (SaturatingSubV (Binary dst src) mask));
25553   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25554   ins_encode %{
25555     int vlen_enc = vector_length_encoding(this);
25556     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25557     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25558                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25559   %}
25560   ins_pipe( pipe_slow );
25561 %}
25562 
25563 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25564   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25565             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25566   match(Set dst (SaturatingAddV (Binary dst src) mask));
25567   match(Set dst (SaturatingSubV (Binary dst src) mask));
25568   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25569   ins_encode %{
25570     int vlen_enc = vector_length_encoding(this);
25571     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25572     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25573                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25574   %}
25575   ins_pipe( pipe_slow );
25576 %}
25577 
25578 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25579   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25580             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25581   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25582   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25583   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25584   ins_encode %{
25585     int vlen_enc = vector_length_encoding(this);
25586     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25587     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25588                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25589   %}
25590   ins_pipe( pipe_slow );
25591 %}
25592 
25593 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25594   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25595             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25596   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25597   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25598   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25599   ins_encode %{
25600     int vlen_enc = vector_length_encoding(this);
25601     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25602     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25603                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25604   %}
25605   ins_pipe( pipe_slow );
25606 %}
25607 
25608 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25609 %{
25610   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25611   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25612   ins_encode %{
25613     int vlen_enc = vector_length_encoding(this);
25614     BasicType bt = Matcher::vector_element_basic_type(this);
25615     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25616   %}
25617   ins_pipe(pipe_slow);
25618 %}
25619 
25620 instruct reinterpretS2HF(regF dst, rRegI src)
25621 %{
25622   match(Set dst (ReinterpretS2HF src));
25623   format %{ "evmovw $dst, $src" %}
25624   ins_encode %{
25625     __ evmovw($dst$$XMMRegister, $src$$Register);
25626   %}
25627   ins_pipe(pipe_slow);
25628 %}
25629 
25630 instruct reinterpretHF2S(rRegI dst, regF src)
25631 %{
25632   match(Set dst (ReinterpretHF2S src));
25633   format %{ "evmovw $dst, $src" %}
25634   ins_encode %{
25635     __ evmovw($dst$$Register, $src$$XMMRegister);
25636   %}
25637   ins_pipe(pipe_slow);
25638 %}
25639 
25640 instruct convF2HFAndS2HF(regF dst, regF src)
25641 %{
25642   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25643   format %{ "convF2HFAndS2HF $dst, $src" %}
25644   ins_encode %{
25645     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25646   %}
25647   ins_pipe(pipe_slow);
25648 %}
25649 
25650 instruct convHF2SAndHF2F(regF dst, regF src)
25651 %{
25652   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25653   format %{ "convHF2SAndHF2F $dst, $src" %}
25654   ins_encode %{
25655     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25656   %}
25657   ins_pipe(pipe_slow);
25658 %}
25659 
25660 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25661 %{
25662   match(Set dst (SqrtHF src));
25663   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25664   ins_encode %{
25665     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25666   %}
25667   ins_pipe(pipe_slow);
25668 %}
25669 
25670 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25671 %{
25672   match(Set dst (AddHF src1 src2));
25673   match(Set dst (DivHF src1 src2));
25674   match(Set dst (MulHF src1 src2));
25675   match(Set dst (SubHF src1 src2));
25676   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25677   ins_encode %{
25678     int opcode = this->ideal_Opcode();
25679     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25680   %}
25681   ins_pipe(pipe_slow);
25682 %}
25683 
25684 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25685 %{
25686   predicate(VM_Version::supports_avx10_2());
25687   match(Set dst (MaxHF src1 src2));
25688   match(Set dst (MinHF src1 src2));
25689 
25690   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25691   ins_encode %{
25692     int opcode = this->ideal_Opcode();
25693     __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
25694   %}
25695   ins_pipe( pipe_slow );
25696 %}
25697 
25698 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25699 %{
25700   predicate(!VM_Version::supports_avx10_2());
25701   match(Set dst (MaxHF src1 src2));
25702   match(Set dst (MinHF src1 src2));
25703   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25704 
25705   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25706   ins_encode %{
25707     int opcode = this->ideal_Opcode();
25708     __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25709                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25710   %}
25711   ins_pipe( pipe_slow );
25712 %}
25713 
25714 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25715 %{
25716   match(Set dst (FmaHF  src2 (Binary dst src1)));
25717   effect(DEF dst);
25718   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25719   ins_encode %{
25720     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25721   %}
25722   ins_pipe( pipe_slow );
25723 %}
25724 
25725 
25726 instruct vector_sqrt_HF_reg(vec dst, vec src)
25727 %{
25728   match(Set dst (SqrtVHF src));
25729   format %{ "vector_sqrt_fp16 $dst, $src" %}
25730   ins_encode %{
25731     int vlen_enc = vector_length_encoding(this);
25732     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25733   %}
25734   ins_pipe(pipe_slow);
25735 %}
25736 
25737 instruct vector_sqrt_HF_mem(vec dst, memory src)
25738 %{
25739   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25740   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25741   ins_encode %{
25742     int vlen_enc = vector_length_encoding(this);
25743     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25744   %}
25745   ins_pipe(pipe_slow);
25746 %}
25747 
25748 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25749 %{
25750   match(Set dst (AddVHF src1 src2));
25751   match(Set dst (DivVHF src1 src2));
25752   match(Set dst (MulVHF src1 src2));
25753   match(Set dst (SubVHF src1 src2));
25754   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25755   ins_encode %{
25756     int vlen_enc = vector_length_encoding(this);
25757     int opcode = this->ideal_Opcode();
25758     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25759   %}
25760   ins_pipe(pipe_slow);
25761 %}
25762 
25763 
25764 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25765 %{
25766   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25767   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25768   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25769   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25770   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25771   ins_encode %{
25772     int vlen_enc = vector_length_encoding(this);
25773     int opcode = this->ideal_Opcode();
25774     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25775   %}
25776   ins_pipe(pipe_slow);
25777 %}
25778 
25779 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25780 %{
25781   match(Set dst (FmaVHF src2 (Binary dst src1)));
25782   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25783   ins_encode %{
25784     int vlen_enc = vector_length_encoding(this);
25785     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25786   %}
25787   ins_pipe( pipe_slow );
25788 %}
25789 
25790 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25791 %{
25792   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25793   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25794   ins_encode %{
25795     int vlen_enc = vector_length_encoding(this);
25796     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25797   %}
25798   ins_pipe( pipe_slow );
25799 %}
25800 
25801 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25802 %{
25803   predicate(VM_Version::supports_avx10_2());
25804   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25805   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25806   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25807   ins_encode %{
25808     int vlen_enc = vector_length_encoding(this);
25809     int opcode = this->ideal_Opcode();
25810     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
25811                             k0, vlen_enc);
25812   %}
25813   ins_pipe( pipe_slow );
25814 %}
25815 
25816 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25817 %{
25818   predicate(VM_Version::supports_avx10_2());
25819   match(Set dst (MinVHF src1 src2));
25820   match(Set dst (MaxVHF src1 src2));
25821   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25822   ins_encode %{
25823     int vlen_enc = vector_length_encoding(this);
25824     int opcode = this->ideal_Opcode();
25825     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25826                             k0, vlen_enc);
25827   %}
25828   ins_pipe( pipe_slow );
25829 %}
25830 
25831 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25832 %{
25833   predicate(!VM_Version::supports_avx10_2());
25834   match(Set dst (MinVHF src1 src2));
25835   match(Set dst (MaxVHF src1 src2));
25836   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25837   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25838   ins_encode %{
25839     int vlen_enc = vector_length_encoding(this);
25840     int opcode = this->ideal_Opcode();
25841     __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25842                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25843   %}
25844   ins_pipe( pipe_slow );
25845 %}
25846 
25847 //----------PEEPHOLE RULES-----------------------------------------------------
25848 // These must follow all instruction definitions as they use the names
25849 // defined in the instructions definitions.
25850 //
25851 // peeppredicate ( rule_predicate );
25852 // // the predicate unless which the peephole rule will be ignored
25853 //
25854 // peepmatch ( root_instr_name [preceding_instruction]* );
25855 //
25856 // peepprocedure ( procedure_name );
25857 // // provide a procedure name to perform the optimization, the procedure should
25858 // // reside in the architecture dependent peephole file, the method has the
25859 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25860 // // with the arguments being the basic block, the current node index inside the
25861 // // block, the register allocator, the functions upon invoked return a new node
25862 // // defined in peepreplace, and the rules of the nodes appearing in the
25863 // // corresponding peepmatch, the function return true if successful, else
25864 // // return false
25865 //
25866 // peepconstraint %{
25867 // (instruction_number.operand_name relational_op instruction_number.operand_name
25868 //  [, ...] );
25869 // // instruction numbers are zero-based using left to right order in peepmatch
25870 //
25871 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25872 // // provide an instruction_number.operand_name for each operand that appears
25873 // // in the replacement instruction's match rule
25874 //
25875 // ---------VM FLAGS---------------------------------------------------------
25876 //
25877 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25878 //
25879 // Each peephole rule is given an identifying number starting with zero and
25880 // increasing by one in the order seen by the parser.  An individual peephole
25881 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25882 // on the command-line.
25883 //
25884 // ---------CURRENT LIMITATIONS----------------------------------------------
25885 //
25886 // Only transformations inside a basic block (do we need more for peephole)
25887 //
25888 // ---------EXAMPLE----------------------------------------------------------
25889 //
25890 // // pertinent parts of existing instructions in architecture description
25891 // instruct movI(rRegI dst, rRegI src)
25892 // %{
25893 //   match(Set dst (CopyI src));
25894 // %}
25895 //
25896 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25897 // %{
25898 //   match(Set dst (AddI dst src));
25899 //   effect(KILL cr);
25900 // %}
25901 //
25902 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25903 // %{
25904 //   match(Set dst (AddI dst src));
25905 // %}
25906 //
25907 // 1. Simple replacement
25908 // - Only match adjacent instructions in same basic block
25909 // - Only equality constraints
25910 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25911 // - Only one replacement instruction
25912 //
25913 // // Change (inc mov) to lea
25914 // peephole %{
25915 //   // lea should only be emitted when beneficial
25916 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25917 //   // increment preceded by register-register move
25918 //   peepmatch ( incI_rReg movI );
25919 //   // require that the destination register of the increment
25920 //   // match the destination register of the move
25921 //   peepconstraint ( 0.dst == 1.dst );
25922 //   // construct a replacement instruction that sets
25923 //   // the destination to ( move's source register + one )
25924 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25925 // %}
25926 //
25927 // 2. Procedural replacement
25928 // - More flexible finding relevent nodes
25929 // - More flexible constraints
25930 // - More flexible transformations
25931 // - May utilise architecture-dependent API more effectively
25932 // - Currently only one replacement instruction due to adlc parsing capabilities
25933 //
25934 // // Change (inc mov) to lea
25935 // peephole %{
25936 //   // lea should only be emitted when beneficial
25937 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25938 //   // the rule numbers of these nodes inside are passed into the function below
25939 //   peepmatch ( incI_rReg movI );
25940 //   // the method that takes the responsibility of transformation
25941 //   peepprocedure ( inc_mov_to_lea );
25942 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25943 //   // node is passed into the function above
25944 //   peepreplace ( leaI_rReg_immI() );
25945 // %}
25946 
25947 // These instructions is not matched by the matcher but used by the peephole
25948 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25949 %{
25950   predicate(false);
25951   match(Set dst (AddI src1 src2));
25952   format %{ "leal    $dst, [$src1 + $src2]" %}
25953   ins_encode %{
25954     Register dst = $dst$$Register;
25955     Register src1 = $src1$$Register;
25956     Register src2 = $src2$$Register;
25957     if (src1 != rbp && src1 != r13) {
25958       __ leal(dst, Address(src1, src2, Address::times_1));
25959     } else {
25960       assert(src2 != rbp && src2 != r13, "");
25961       __ leal(dst, Address(src2, src1, Address::times_1));
25962     }
25963   %}
25964   ins_pipe(ialu_reg_reg);
25965 %}
25966 
25967 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25968 %{
25969   predicate(false);
25970   match(Set dst (AddI src1 src2));
25971   format %{ "leal    $dst, [$src1 + $src2]" %}
25972   ins_encode %{
25973     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25974   %}
25975   ins_pipe(ialu_reg_reg);
25976 %}
25977 
25978 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25979 %{
25980   predicate(false);
25981   match(Set dst (LShiftI src shift));
25982   format %{ "leal    $dst, [$src << $shift]" %}
25983   ins_encode %{
25984     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25985     Register src = $src$$Register;
25986     if (scale == Address::times_2 && src != rbp && src != r13) {
25987       __ leal($dst$$Register, Address(src, src, Address::times_1));
25988     } else {
25989       __ leal($dst$$Register, Address(noreg, src, scale));
25990     }
25991   %}
25992   ins_pipe(ialu_reg_reg);
25993 %}
25994 
25995 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25996 %{
25997   predicate(false);
25998   match(Set dst (AddL src1 src2));
25999   format %{ "leaq    $dst, [$src1 + $src2]" %}
26000   ins_encode %{
26001     Register dst = $dst$$Register;
26002     Register src1 = $src1$$Register;
26003     Register src2 = $src2$$Register;
26004     if (src1 != rbp && src1 != r13) {
26005       __ leaq(dst, Address(src1, src2, Address::times_1));
26006     } else {
26007       assert(src2 != rbp && src2 != r13, "");
26008       __ leaq(dst, Address(src2, src1, Address::times_1));
26009     }
26010   %}
26011   ins_pipe(ialu_reg_reg);
26012 %}
26013 
26014 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
26015 %{
26016   predicate(false);
26017   match(Set dst (AddL src1 src2));
26018   format %{ "leaq    $dst, [$src1 + $src2]" %}
26019   ins_encode %{
26020     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
26021   %}
26022   ins_pipe(ialu_reg_reg);
26023 %}
26024 
26025 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
26026 %{
26027   predicate(false);
26028   match(Set dst (LShiftL src shift));
26029   format %{ "leaq    $dst, [$src << $shift]" %}
26030   ins_encode %{
26031     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
26032     Register src = $src$$Register;
26033     if (scale == Address::times_2 && src != rbp && src != r13) {
26034       __ leaq($dst$$Register, Address(src, src, Address::times_1));
26035     } else {
26036       __ leaq($dst$$Register, Address(noreg, src, scale));
26037     }
26038   %}
26039   ins_pipe(ialu_reg_reg);
26040 %}
26041 
26042 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
26043 // sal}) with lea instructions. The {add, sal} rules are beneficial in
26044 // processors with at least partial ALU support for lea
26045 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
26046 // beneficial for processors with full ALU support
26047 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
26048 
26049 peephole
26050 %{
26051   peeppredicate(VM_Version::supports_fast_2op_lea());
26052   peepmatch (addI_rReg);
26053   peepprocedure (lea_coalesce_reg);
26054   peepreplace (leaI_rReg_rReg_peep());
26055 %}
26056 
26057 peephole
26058 %{
26059   peeppredicate(VM_Version::supports_fast_2op_lea());
26060   peepmatch (addI_rReg_imm);
26061   peepprocedure (lea_coalesce_imm);
26062   peepreplace (leaI_rReg_immI_peep());
26063 %}
26064 
26065 peephole
26066 %{
26067   peeppredicate(VM_Version::supports_fast_3op_lea() ||
26068                 VM_Version::is_intel_cascade_lake());
26069   peepmatch (incI_rReg);
26070   peepprocedure (lea_coalesce_imm);
26071   peepreplace (leaI_rReg_immI_peep());
26072 %}
26073 
26074 peephole
26075 %{
26076   peeppredicate(VM_Version::supports_fast_3op_lea() ||
26077                 VM_Version::is_intel_cascade_lake());
26078   peepmatch (decI_rReg);
26079   peepprocedure (lea_coalesce_imm);
26080   peepreplace (leaI_rReg_immI_peep());
26081 %}
26082 
26083 peephole
26084 %{
26085   peeppredicate(VM_Version::supports_fast_2op_lea());
26086   peepmatch (salI_rReg_immI2);
26087   peepprocedure (lea_coalesce_imm);
26088   peepreplace (leaI_rReg_immI2_peep());
26089 %}
26090 
26091 peephole
26092 %{
26093   peeppredicate(VM_Version::supports_fast_2op_lea());
26094   peepmatch (addL_rReg);
26095   peepprocedure (lea_coalesce_reg);
26096   peepreplace (leaL_rReg_rReg_peep());
26097 %}
26098 
26099 peephole
26100 %{
26101   peeppredicate(VM_Version::supports_fast_2op_lea());
26102   peepmatch (addL_rReg_imm);
26103   peepprocedure (lea_coalesce_imm);
26104   peepreplace (leaL_rReg_immL32_peep());
26105 %}
26106 
26107 peephole
26108 %{
26109   peeppredicate(VM_Version::supports_fast_3op_lea() ||
26110                 VM_Version::is_intel_cascade_lake());
26111   peepmatch (incL_rReg);
26112   peepprocedure (lea_coalesce_imm);
26113   peepreplace (leaL_rReg_immL32_peep());
26114 %}
26115 
26116 peephole
26117 %{
26118   peeppredicate(VM_Version::supports_fast_3op_lea() ||
26119                 VM_Version::is_intel_cascade_lake());
26120   peepmatch (decL_rReg);
26121   peepprocedure (lea_coalesce_imm);
26122   peepreplace (leaL_rReg_immL32_peep());
26123 %}
26124 
26125 peephole
26126 %{
26127   peeppredicate(VM_Version::supports_fast_2op_lea());
26128   peepmatch (salL_rReg_immI2);
26129   peepprocedure (lea_coalesce_imm);
26130   peepreplace (leaL_rReg_immI2_peep());
26131 %}
26132 
26133 peephole
26134 %{
26135   peepmatch (leaPCompressedOopOffset);
26136   peepprocedure (lea_remove_redundant);
26137 %}
26138 
26139 peephole
26140 %{
26141   peepmatch (leaP8Narrow);
26142   peepprocedure (lea_remove_redundant);
26143 %}
26144 
26145 peephole
26146 %{
26147   peepmatch (leaP32Narrow);
26148   peepprocedure (lea_remove_redundant);
26149 %}
26150 
26151 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
26152 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
26153 
26154 //int variant
26155 peephole
26156 %{
26157   peepmatch (testI_reg);
26158   peepprocedure (test_may_remove);
26159 %}
26160 
26161 //long variant
26162 peephole
26163 %{
26164   peepmatch (testL_reg);
26165   peepprocedure (test_may_remove);
26166 %}
26167 
26168 
26169 //----------SMARTSPILL RULES---------------------------------------------------
26170 // These must follow all instruction definitions as they use the names
26171 // defined in the instructions definitions.