1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 
 1663 //
 1664 // Compute padding required for nodes which need alignment
 1665 //
 1666 
 1667 // The address of the call instruction needs to be 4-byte aligned to
 1668 // ensure that it does not span a cache line so that it can be patched.
 1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1670 {
 1671   current_offset += clear_avx_size(); // skip vzeroupper
 1672   current_offset += 1; // skip call opcode byte
 1673   return align_up(current_offset, alignment_required()) - current_offset;
 1674 }
 1675 
 1676 // The address of the call instruction needs to be 4-byte aligned to
 1677 // ensure that it does not span a cache line so that it can be patched.
 1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1679 {
 1680   current_offset += clear_avx_size(); // skip vzeroupper
 1681   current_offset += 11; // skip movq instruction + call opcode byte
 1682   return align_up(current_offset, alignment_required()) - current_offset;
 1683 }
 1684 
 1685 // This could be in MacroAssembler but it's fairly C2 specific
 1686 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1687   Label exit;
 1688   __ jccb(Assembler::noParity, exit);
 1689   __ pushf();
 1690   //
 1691   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1692   // zero OF,AF,SF for NaN values.
 1693   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1694   // values returns 'less than' result (CF is set).
 1695   // Leave the rest of flags unchanged.
 1696   //
 1697   //    7 6 5 4 3 2 1 0
 1698   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1699   //    0 0 1 0 1 0 1 1   (0x2B)
 1700   //
 1701   __ andq(Address(rsp, 0), 0xffffff2b);
 1702   __ popf();
 1703   __ bind(exit);
 1704 }
 1705 
 1706 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1707   // If any floating point comparison instruction is used, unordered case always triggers jump
 1708   // for below condition, CF=1 is true when at least one input is NaN
 1709   Label done;
 1710   __ movl(dst, -1);
 1711   __ jcc(Assembler::below, done);
 1712   __ setcc(Assembler::notEqual, dst);
 1713   __ bind(done);
 1714 }
 1715 
 1716 enum FP_PREC {
 1717   fp_prec_hlf,
 1718   fp_prec_flt,
 1719   fp_prec_dbl
 1720 };
 1721 
 1722 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
 1723                                 XMMRegister p, XMMRegister q) {
 1724   if (pt == fp_prec_hlf) {
 1725     __ evucomish(p, q);
 1726   } else if (pt == fp_prec_flt) {
 1727     __ ucomiss(p, q);
 1728   } else {
 1729     __ ucomisd(p, q);
 1730   }
 1731 }
 1732 
 1733 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
 1734                          XMMRegister dst, XMMRegister src, Register scratch) {
 1735   if (pt == fp_prec_hlf) {
 1736     __ movhlf(dst, src, scratch);
 1737   } else if (pt == fp_prec_flt) {
 1738     __ movflt(dst, src);
 1739   } else {
 1740     __ movdbl(dst, src);
 1741   }
 1742 }
 1743 
 1744 // Math.min()          # Math.max()
 1745 // -----------------------------
 1746 // (v)ucomis[h/s/d]    #
 1747 // ja   -> b           # a
 1748 // jp   -> NaN         # NaN
 1749 // jb   -> a           # b
 1750 // je                  #
 1751 // |-jz -> a | b       # a & b
 1752 // |    -> a           #
 1753 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1754                             XMMRegister a, XMMRegister b,
 1755                             XMMRegister xmmt, Register rt,
 1756                             bool min, enum FP_PREC pt) {
 1757 
 1758   Label nan, zero, below, above, done;
 1759 
 1760   emit_fp_ucom(masm, pt, a, b);
 1761 
 1762   if (dst->encoding() != (min ? b : a)->encoding()) {
 1763     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1764   } else {
 1765     __ jccb(Assembler::above, done);
 1766   }
 1767 
 1768   __ jccb(Assembler::parity, nan);  // PF=1
 1769   __ jccb(Assembler::below, below); // CF=1
 1770 
 1771   // equal
 1772   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1773   emit_fp_ucom(masm, pt, a, xmmt);
 1774 
 1775   __ jccb(Assembler::equal, zero);
 1776   movfp(masm, pt, dst, a, rt);
 1777 
 1778   __ jmp(done);
 1779 
 1780   __ bind(zero);
 1781   if (min) {
 1782     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1783   } else {
 1784     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1785   }
 1786 
 1787   __ jmp(done);
 1788 
 1789   __ bind(above);
 1790   movfp(masm, pt, dst, min ? b : a, rt);
 1791 
 1792   __ jmp(done);
 1793 
 1794   __ bind(nan);
 1795   if (pt == fp_prec_hlf) {
 1796     __ movl(rt, 0x00007e00); // Float16.NaN
 1797     __ evmovw(dst, rt);
 1798   } else if (pt == fp_prec_flt) {
 1799     __ movl(rt, 0x7fc00000); // Float.NaN
 1800     __ movdl(dst, rt);
 1801   } else {
 1802     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1803     __ movdq(dst, rt);
 1804   }
 1805   __ jmp(done);
 1806 
 1807   __ bind(below);
 1808   movfp(masm, pt, dst, min ? a : b, rt);
 1809 
 1810   __ bind(done);
 1811 }
 1812 
 1813 //=============================================================================
 1814 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1815 
 1816 int ConstantTable::calculate_table_base_offset() const {
 1817   return 0;  // absolute addressing, no offset
 1818 }
 1819 
 1820 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1821 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1822   ShouldNotReachHere();
 1823 }
 1824 
 1825 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1826   // Empty encoding
 1827 }
 1828 
 1829 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1830   return 0;
 1831 }
 1832 
 1833 #ifndef PRODUCT
 1834 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1835   st->print("# MachConstantBaseNode (empty encoding)");
 1836 }
 1837 #endif
 1838 
 1839 
 1840 //=============================================================================
 1841 #ifndef PRODUCT
 1842 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1843   Compile* C = ra_->C;
 1844 
 1845   int framesize = C->output()->frame_size_in_bytes();
 1846   int bangsize = C->output()->bang_size_in_bytes();
 1847   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1848   // Remove wordSize for return addr which is already pushed.
 1849   framesize -= wordSize;
 1850 
 1851   if (C->output()->need_stack_bang(bangsize)) {
 1852     framesize -= wordSize;
 1853     st->print("# stack bang (%d bytes)", bangsize);
 1854     st->print("\n\t");
 1855     st->print("pushq   rbp\t# Save rbp");
 1856     if (PreserveFramePointer) {
 1857         st->print("\n\t");
 1858         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1859     }
 1860     if (framesize) {
 1861       st->print("\n\t");
 1862       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1863     }
 1864   } else {
 1865     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1866     st->print("\n\t");
 1867     framesize -= wordSize;
 1868     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1869     if (PreserveFramePointer) {
 1870       st->print("\n\t");
 1871       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1872       if (framesize > 0) {
 1873         st->print("\n\t");
 1874         st->print("addq    rbp, #%d", framesize);
 1875       }
 1876     }
 1877   }
 1878 
 1879   if (VerifyStackAtCalls) {
 1880     st->print("\n\t");
 1881     framesize -= wordSize;
 1882     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1883 #ifdef ASSERT
 1884     st->print("\n\t");
 1885     st->print("# stack alignment check");
 1886 #endif
 1887   }
 1888   if (C->stub_function() != nullptr) {
 1889     st->print("\n\t");
 1890     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1891     st->print("\n\t");
 1892     st->print("je      fast_entry\t");
 1893     st->print("\n\t");
 1894     st->print("call    #nmethod_entry_barrier_stub\t");
 1895     st->print("\n\tfast_entry:");
 1896   }
 1897   st->cr();
 1898 }
 1899 #endif
 1900 
 1901 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1902   Compile* C = ra_->C;
 1903 
 1904   __ verified_entry(C);
 1905 
 1906   if (ra_->C->stub_function() == nullptr) {
 1907     __ entry_barrier();
 1908   }
 1909 
 1910   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1911     __ bind(*_verified_entry);
 1912   }
 1913 
 1914   C->output()->set_frame_complete(__ offset());
 1915 
 1916   if (C->has_mach_constant_base_node()) {
 1917     // NOTE: We set the table base offset here because users might be
 1918     // emitted before MachConstantBaseNode.
 1919     ConstantTable& constant_table = C->output()->constant_table();
 1920     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1921   }
 1922 }
 1923 
 1924 
 1925 int MachPrologNode::reloc() const
 1926 {
 1927   return 0; // a large enough number
 1928 }
 1929 
 1930 //=============================================================================
 1931 #ifndef PRODUCT
 1932 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1933 {
 1934   Compile* C = ra_->C;
 1935   if (generate_vzeroupper(C)) {
 1936     st->print("vzeroupper");
 1937     st->cr(); st->print("\t");
 1938   }
 1939 
 1940   int framesize = C->output()->frame_size_in_bytes();
 1941   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1942   // Remove word for return adr already pushed
 1943   // and RBP
 1944   framesize -= 2*wordSize;
 1945 
 1946   if (framesize) {
 1947     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1948     st->print("\t");
 1949   }
 1950 
 1951   st->print_cr("popq    rbp");
 1952   if (do_polling() && C->is_method_compilation()) {
 1953     st->print("\t");
 1954     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1955                  "ja      #safepoint_stub\t"
 1956                  "# Safepoint: poll for GC");
 1957   }
 1958 }
 1959 #endif
 1960 
 1961 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1962 {
 1963   Compile* C = ra_->C;
 1964 
 1965   if (generate_vzeroupper(C)) {
 1966     // Clear upper bits of YMM registers when current compiled code uses
 1967     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1968     __ vzeroupper();
 1969   }
 1970 
 1971   // Subtract two words to account for return address and rbp
 1972   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1973   __ remove_frame(initial_framesize, C->needs_stack_repair());
 1974 
 1975   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1976     __ reserved_stack_check();
 1977   }
 1978 
 1979   if (do_polling() && C->is_method_compilation()) {
 1980     Label dummy_label;
 1981     Label* code_stub = &dummy_label;
 1982     if (!C->output()->in_scratch_emit_size()) {
 1983       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1984       C->output()->add_stub(stub);
 1985       code_stub = &stub->entry();
 1986     }
 1987     __ relocate(relocInfo::poll_return_type);
 1988     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1989   }
 1990 }
 1991 
 1992 int MachEpilogNode::reloc() const
 1993 {
 1994   return 2; // a large enough number
 1995 }
 1996 
 1997 const Pipeline* MachEpilogNode::pipeline() const
 1998 {
 1999   return MachNode::pipeline_class();
 2000 }
 2001 
 2002 //=============================================================================
 2003 
 2004 enum RC {
 2005   rc_bad,
 2006   rc_int,
 2007   rc_kreg,
 2008   rc_float,
 2009   rc_stack
 2010 };
 2011 
 2012 static enum RC rc_class(OptoReg::Name reg)
 2013 {
 2014   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2015 
 2016   if (OptoReg::is_stack(reg)) return rc_stack;
 2017 
 2018   VMReg r = OptoReg::as_VMReg(reg);
 2019 
 2020   if (r->is_Register()) return rc_int;
 2021 
 2022   if (r->is_KRegister()) return rc_kreg;
 2023 
 2024   assert(r->is_XMMRegister(), "must be");
 2025   return rc_float;
 2026 }
 2027 
 2028 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2029 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2030                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2031 
 2032 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2033                      int stack_offset, int reg, uint ireg, outputStream* st);
 2034 
 2035 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2036                                       int dst_offset, uint ireg, outputStream* st) {
 2037   if (masm) {
 2038     switch (ireg) {
 2039     case Op_VecS:
 2040       __ movq(Address(rsp, -8), rax);
 2041       __ movl(rax, Address(rsp, src_offset));
 2042       __ movl(Address(rsp, dst_offset), rax);
 2043       __ movq(rax, Address(rsp, -8));
 2044       break;
 2045     case Op_VecD:
 2046       __ pushq(Address(rsp, src_offset));
 2047       __ popq (Address(rsp, dst_offset));
 2048       break;
 2049     case Op_VecX:
 2050       __ pushq(Address(rsp, src_offset));
 2051       __ popq (Address(rsp, dst_offset));
 2052       __ pushq(Address(rsp, src_offset+8));
 2053       __ popq (Address(rsp, dst_offset+8));
 2054       break;
 2055     case Op_VecY:
 2056       __ vmovdqu(Address(rsp, -32), xmm0);
 2057       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2058       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2059       __ vmovdqu(xmm0, Address(rsp, -32));
 2060       break;
 2061     case Op_VecZ:
 2062       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2063       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2064       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2065       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2066       break;
 2067     default:
 2068       ShouldNotReachHere();
 2069     }
 2070 #ifndef PRODUCT
 2071   } else {
 2072     switch (ireg) {
 2073     case Op_VecS:
 2074       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2075                 "movl    rax, [rsp + #%d]\n\t"
 2076                 "movl    [rsp + #%d], rax\n\t"
 2077                 "movq    rax, [rsp - #8]",
 2078                 src_offset, dst_offset);
 2079       break;
 2080     case Op_VecD:
 2081       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2082                 "popq    [rsp + #%d]",
 2083                 src_offset, dst_offset);
 2084       break;
 2085      case Op_VecX:
 2086       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2087                 "popq    [rsp + #%d]\n\t"
 2088                 "pushq   [rsp + #%d]\n\t"
 2089                 "popq    [rsp + #%d]",
 2090                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2091       break;
 2092     case Op_VecY:
 2093       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2094                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2095                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2096                 "vmovdqu xmm0, [rsp - #32]",
 2097                 src_offset, dst_offset);
 2098       break;
 2099     case Op_VecZ:
 2100       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2101                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2102                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2103                 "vmovdqu xmm0, [rsp - #64]",
 2104                 src_offset, dst_offset);
 2105       break;
 2106     default:
 2107       ShouldNotReachHere();
 2108     }
 2109 #endif
 2110   }
 2111 }
 2112 
 2113 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2114                                        PhaseRegAlloc* ra_,
 2115                                        bool do_size,
 2116                                        outputStream* st) const {
 2117   assert(masm != nullptr || st  != nullptr, "sanity");
 2118   // Get registers to move
 2119   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2120   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2121   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2122   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2123 
 2124   enum RC src_second_rc = rc_class(src_second);
 2125   enum RC src_first_rc = rc_class(src_first);
 2126   enum RC dst_second_rc = rc_class(dst_second);
 2127   enum RC dst_first_rc = rc_class(dst_first);
 2128 
 2129   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2130          "must move at least 1 register" );
 2131 
 2132   if (src_first == dst_first && src_second == dst_second) {
 2133     // Self copy, no move
 2134     return 0;
 2135   }
 2136   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 2137     uint ireg = ideal_reg();
 2138     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2139     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2140     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2141       // mem -> mem
 2142       int src_offset = ra_->reg2offset(src_first);
 2143       int dst_offset = ra_->reg2offset(dst_first);
 2144       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2145     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2146       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2147     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2148       int stack_offset = ra_->reg2offset(dst_first);
 2149       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2150     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2151       int stack_offset = ra_->reg2offset(src_first);
 2152       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2153     } else {
 2154       ShouldNotReachHere();
 2155     }
 2156     return 0;
 2157   }
 2158   if (src_first_rc == rc_stack) {
 2159     // mem ->
 2160     if (dst_first_rc == rc_stack) {
 2161       // mem -> mem
 2162       assert(src_second != dst_first, "overlap");
 2163       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2164           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2165         // 64-bit
 2166         int src_offset = ra_->reg2offset(src_first);
 2167         int dst_offset = ra_->reg2offset(dst_first);
 2168         if (masm) {
 2169           __ pushq(Address(rsp, src_offset));
 2170           __ popq (Address(rsp, dst_offset));
 2171 #ifndef PRODUCT
 2172         } else {
 2173           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2174                     "popq    [rsp + #%d]",
 2175                      src_offset, dst_offset);
 2176 #endif
 2177         }
 2178       } else {
 2179         // 32-bit
 2180         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2181         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2182         // No pushl/popl, so:
 2183         int src_offset = ra_->reg2offset(src_first);
 2184         int dst_offset = ra_->reg2offset(dst_first);
 2185         if (masm) {
 2186           __ movq(Address(rsp, -8), rax);
 2187           __ movl(rax, Address(rsp, src_offset));
 2188           __ movl(Address(rsp, dst_offset), rax);
 2189           __ movq(rax, Address(rsp, -8));
 2190 #ifndef PRODUCT
 2191         } else {
 2192           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2193                     "movl    rax, [rsp + #%d]\n\t"
 2194                     "movl    [rsp + #%d], rax\n\t"
 2195                     "movq    rax, [rsp - #8]",
 2196                      src_offset, dst_offset);
 2197 #endif
 2198         }
 2199       }
 2200       return 0;
 2201     } else if (dst_first_rc == rc_int) {
 2202       // mem -> gpr
 2203       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2204           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2205         // 64-bit
 2206         int offset = ra_->reg2offset(src_first);
 2207         if (masm) {
 2208           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2209 #ifndef PRODUCT
 2210         } else {
 2211           st->print("movq    %s, [rsp + #%d]\t# spill",
 2212                      Matcher::regName[dst_first],
 2213                      offset);
 2214 #endif
 2215         }
 2216       } else {
 2217         // 32-bit
 2218         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2219         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2220         int offset = ra_->reg2offset(src_first);
 2221         if (masm) {
 2222           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2223 #ifndef PRODUCT
 2224         } else {
 2225           st->print("movl    %s, [rsp + #%d]\t# spill",
 2226                      Matcher::regName[dst_first],
 2227                      offset);
 2228 #endif
 2229         }
 2230       }
 2231       return 0;
 2232     } else if (dst_first_rc == rc_float) {
 2233       // mem-> xmm
 2234       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2235           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2236         // 64-bit
 2237         int offset = ra_->reg2offset(src_first);
 2238         if (masm) {
 2239           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2240 #ifndef PRODUCT
 2241         } else {
 2242           st->print("%s  %s, [rsp + #%d]\t# spill",
 2243                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2244                      Matcher::regName[dst_first],
 2245                      offset);
 2246 #endif
 2247         }
 2248       } else {
 2249         // 32-bit
 2250         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2251         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2252         int offset = ra_->reg2offset(src_first);
 2253         if (masm) {
 2254           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2255 #ifndef PRODUCT
 2256         } else {
 2257           st->print("movss   %s, [rsp + #%d]\t# spill",
 2258                      Matcher::regName[dst_first],
 2259                      offset);
 2260 #endif
 2261         }
 2262       }
 2263       return 0;
 2264     } else if (dst_first_rc == rc_kreg) {
 2265       // mem -> kreg
 2266       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2267           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2268         // 64-bit
 2269         int offset = ra_->reg2offset(src_first);
 2270         if (masm) {
 2271           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2272 #ifndef PRODUCT
 2273         } else {
 2274           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2275                      Matcher::regName[dst_first],
 2276                      offset);
 2277 #endif
 2278         }
 2279       }
 2280       return 0;
 2281     }
 2282   } else if (src_first_rc == rc_int) {
 2283     // gpr ->
 2284     if (dst_first_rc == rc_stack) {
 2285       // gpr -> mem
 2286       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2287           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2288         // 64-bit
 2289         int offset = ra_->reg2offset(dst_first);
 2290         if (masm) {
 2291           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2292 #ifndef PRODUCT
 2293         } else {
 2294           st->print("movq    [rsp + #%d], %s\t# spill",
 2295                      offset,
 2296                      Matcher::regName[src_first]);
 2297 #endif
 2298         }
 2299       } else {
 2300         // 32-bit
 2301         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2302         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2303         int offset = ra_->reg2offset(dst_first);
 2304         if (masm) {
 2305           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2306 #ifndef PRODUCT
 2307         } else {
 2308           st->print("movl    [rsp + #%d], %s\t# spill",
 2309                      offset,
 2310                      Matcher::regName[src_first]);
 2311 #endif
 2312         }
 2313       }
 2314       return 0;
 2315     } else if (dst_first_rc == rc_int) {
 2316       // gpr -> gpr
 2317       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2318           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2319         // 64-bit
 2320         if (masm) {
 2321           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2322                   as_Register(Matcher::_regEncode[src_first]));
 2323 #ifndef PRODUCT
 2324         } else {
 2325           st->print("movq    %s, %s\t# spill",
 2326                      Matcher::regName[dst_first],
 2327                      Matcher::regName[src_first]);
 2328 #endif
 2329         }
 2330         return 0;
 2331       } else {
 2332         // 32-bit
 2333         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2334         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2335         if (masm) {
 2336           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2337                   as_Register(Matcher::_regEncode[src_first]));
 2338 #ifndef PRODUCT
 2339         } else {
 2340           st->print("movl    %s, %s\t# spill",
 2341                      Matcher::regName[dst_first],
 2342                      Matcher::regName[src_first]);
 2343 #endif
 2344         }
 2345         return 0;
 2346       }
 2347     } else if (dst_first_rc == rc_float) {
 2348       // gpr -> xmm
 2349       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2350           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2351         // 64-bit
 2352         if (masm) {
 2353           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2354 #ifndef PRODUCT
 2355         } else {
 2356           st->print("movdq   %s, %s\t# spill",
 2357                      Matcher::regName[dst_first],
 2358                      Matcher::regName[src_first]);
 2359 #endif
 2360         }
 2361       } else {
 2362         // 32-bit
 2363         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2364         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2365         if (masm) {
 2366           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2367 #ifndef PRODUCT
 2368         } else {
 2369           st->print("movdl   %s, %s\t# spill",
 2370                      Matcher::regName[dst_first],
 2371                      Matcher::regName[src_first]);
 2372 #endif
 2373         }
 2374       }
 2375       return 0;
 2376     } else if (dst_first_rc == rc_kreg) {
 2377       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2378           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2379         // 64-bit
 2380         if (masm) {
 2381           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2382   #ifndef PRODUCT
 2383         } else {
 2384            st->print("kmovq   %s, %s\t# spill",
 2385                        Matcher::regName[dst_first],
 2386                        Matcher::regName[src_first]);
 2387   #endif
 2388         }
 2389       }
 2390       Unimplemented();
 2391       return 0;
 2392     }
 2393   } else if (src_first_rc == rc_float) {
 2394     // xmm ->
 2395     if (dst_first_rc == rc_stack) {
 2396       // xmm -> mem
 2397       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2398           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2399         // 64-bit
 2400         int offset = ra_->reg2offset(dst_first);
 2401         if (masm) {
 2402           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2403 #ifndef PRODUCT
 2404         } else {
 2405           st->print("movsd   [rsp + #%d], %s\t# spill",
 2406                      offset,
 2407                      Matcher::regName[src_first]);
 2408 #endif
 2409         }
 2410       } else {
 2411         // 32-bit
 2412         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2413         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2414         int offset = ra_->reg2offset(dst_first);
 2415         if (masm) {
 2416           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2417 #ifndef PRODUCT
 2418         } else {
 2419           st->print("movss   [rsp + #%d], %s\t# spill",
 2420                      offset,
 2421                      Matcher::regName[src_first]);
 2422 #endif
 2423         }
 2424       }
 2425       return 0;
 2426     } else if (dst_first_rc == rc_int) {
 2427       // xmm -> gpr
 2428       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2429           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2430         // 64-bit
 2431         if (masm) {
 2432           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2433 #ifndef PRODUCT
 2434         } else {
 2435           st->print("movdq   %s, %s\t# spill",
 2436                      Matcher::regName[dst_first],
 2437                      Matcher::regName[src_first]);
 2438 #endif
 2439         }
 2440       } else {
 2441         // 32-bit
 2442         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2443         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2444         if (masm) {
 2445           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2446 #ifndef PRODUCT
 2447         } else {
 2448           st->print("movdl   %s, %s\t# spill",
 2449                      Matcher::regName[dst_first],
 2450                      Matcher::regName[src_first]);
 2451 #endif
 2452         }
 2453       }
 2454       return 0;
 2455     } else if (dst_first_rc == rc_float) {
 2456       // xmm -> xmm
 2457       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2458           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2459         // 64-bit
 2460         if (masm) {
 2461           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2462 #ifndef PRODUCT
 2463         } else {
 2464           st->print("%s  %s, %s\t# spill",
 2465                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2466                      Matcher::regName[dst_first],
 2467                      Matcher::regName[src_first]);
 2468 #endif
 2469         }
 2470       } else {
 2471         // 32-bit
 2472         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2473         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2474         if (masm) {
 2475           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2476 #ifndef PRODUCT
 2477         } else {
 2478           st->print("%s  %s, %s\t# spill",
 2479                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2480                      Matcher::regName[dst_first],
 2481                      Matcher::regName[src_first]);
 2482 #endif
 2483         }
 2484       }
 2485       return 0;
 2486     } else if (dst_first_rc == rc_kreg) {
 2487       assert(false, "Illegal spilling");
 2488       return 0;
 2489     }
 2490   } else if (src_first_rc == rc_kreg) {
 2491     if (dst_first_rc == rc_stack) {
 2492       // mem -> kreg
 2493       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2494           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2495         // 64-bit
 2496         int offset = ra_->reg2offset(dst_first);
 2497         if (masm) {
 2498           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2499 #ifndef PRODUCT
 2500         } else {
 2501           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2502                      offset,
 2503                      Matcher::regName[src_first]);
 2504 #endif
 2505         }
 2506       }
 2507       return 0;
 2508     } else if (dst_first_rc == rc_int) {
 2509       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2510           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2511         // 64-bit
 2512         if (masm) {
 2513           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2514 #ifndef PRODUCT
 2515         } else {
 2516          st->print("kmovq   %s, %s\t# spill",
 2517                      Matcher::regName[dst_first],
 2518                      Matcher::regName[src_first]);
 2519 #endif
 2520         }
 2521       }
 2522       Unimplemented();
 2523       return 0;
 2524     } else if (dst_first_rc == rc_kreg) {
 2525       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2526           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2527         // 64-bit
 2528         if (masm) {
 2529           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2530 #ifndef PRODUCT
 2531         } else {
 2532          st->print("kmovq   %s, %s\t# spill",
 2533                      Matcher::regName[dst_first],
 2534                      Matcher::regName[src_first]);
 2535 #endif
 2536         }
 2537       }
 2538       return 0;
 2539     } else if (dst_first_rc == rc_float) {
 2540       assert(false, "Illegal spill");
 2541       return 0;
 2542     }
 2543   }
 2544 
 2545   assert(0," foo ");
 2546   Unimplemented();
 2547   return 0;
 2548 }
 2549 
 2550 #ifndef PRODUCT
 2551 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2552   implementation(nullptr, ra_, false, st);
 2553 }
 2554 #endif
 2555 
 2556 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2557   implementation(masm, ra_, false, nullptr);
 2558 }
 2559 
 2560 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2561   return MachNode::size(ra_);
 2562 }
 2563 
 2564 //=============================================================================
 2565 #ifndef PRODUCT
 2566 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2567 {
 2568   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2569   int reg = ra_->get_reg_first(this);
 2570   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2571             Matcher::regName[reg], offset);
 2572 }
 2573 #endif
 2574 
 2575 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2576 {
 2577   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2578   int reg = ra_->get_encode(this);
 2579 
 2580   __ lea(as_Register(reg), Address(rsp, offset));
 2581 }
 2582 
 2583 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2584 {
 2585   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2586   if (ra_->get_encode(this) > 15) {
 2587     return (offset < 0x80) ? 6 : 9; // REX2
 2588   } else {
 2589     return (offset < 0x80) ? 5 : 8; // REX
 2590   }
 2591 }
 2592 
 2593 //=============================================================================
 2594 #ifndef PRODUCT
 2595 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2596 {
 2597   st->print_cr("MachVEPNode");
 2598 }
 2599 #endif
 2600 
 2601 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2602 {
 2603   CodeBuffer* cbuf = masm->code();
 2604   uint insts_size = cbuf->insts_size();
 2605   if (!_verified) {
 2606     __ ic_check(1);
 2607   } else {
 2608     if (ra_->C->stub_function() == nullptr) {
 2609       // Emit the entry barrier in a temporary frame before unpacking because
 2610       // it can deopt, which would require packing the scalarized args again.
 2611       __ verified_entry(ra_->C, 0);
 2612       __ entry_barrier();
 2613       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2614       __ remove_frame(initial_framesize, false);
 2615     }
 2616     // Unpack inline type args passed as oop and then jump to
 2617     // the verified entry point (skipping the unverified entry).
 2618     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2619     // Emit code for verified entry and save increment for stack repair on return
 2620     __ verified_entry(ra_->C, sp_inc);
 2621     if (Compile::current()->output()->in_scratch_emit_size()) {
 2622       Label dummy_verified_entry;
 2623       __ jmp(dummy_verified_entry);
 2624     } else {
 2625       __ jmp(*_verified_entry);
 2626     }
 2627   }
 2628   /* WARNING these NOPs are critical so that verified entry point is properly
 2629      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 2630   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 2631   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 2632   if (nops_cnt > 0) {
 2633     __ nop(nops_cnt);
 2634   }
 2635 }
 2636 
 2637 //=============================================================================
 2638 #ifndef PRODUCT
 2639 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2640 {
 2641   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2642   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2643   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2644 }
 2645 #endif
 2646 
 2647 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2648 {
 2649   __ ic_check(InteriorEntryAlignment);
 2650 }
 2651 
 2652 
 2653 //=============================================================================
 2654 
 2655 bool Matcher::supports_vector_calling_convention(void) {
 2656   return EnableVectorSupport;
 2657 }
 2658 
 2659 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2660   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2661 }
 2662 
 2663 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2664   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2665 }
 2666 
 2667 #ifdef ASSERT
 2668 static bool is_ndd_demotable(const MachNode* mdef) {
 2669   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2670 }
 2671 #endif
 2672 
 2673 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2674                                             int oper_index) {
 2675   if (mdef == nullptr) {
 2676     return false;
 2677   }
 2678 
 2679   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2680       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2681     assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
 2682     assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
 2683     return false;
 2684   }
 2685 
 2686   // Complex memory operand covers multiple incoming edges needed for
 2687   // address computation. Biasing def towards any address component will not
 2688   // result in NDD demotion by assembler.
 2689   if (mdef->operand_num_edges(oper_index) != 1) {
 2690     return false;
 2691   }
 2692 
 2693   // Demotion candidate must be register mask compatible with definition.
 2694   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2695   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2696     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2697     return false;
 2698   }
 2699 
 2700   switch (oper_index) {
 2701   // First operand of MachNode corresponding to Intel APX NDD selection
 2702   // pattern can share its assigned register with definition operand if
 2703   // their live ranges do not overlap. In such a scenario we can demote
 2704   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2705   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2706   // are decorated with a special flag by instruction selector.
 2707   case 1:
 2708     return is_ndd_demotable_opr1(mdef);
 2709 
 2710   // Definition operand of commutative operation can be biased towards second
 2711   // operand.
 2712   case 2:
 2713     return is_ndd_demotable_opr2(mdef);
 2714 
 2715   // Current scheme only selects up to two biasing candidates
 2716   default:
 2717     assert(false, "unhandled operand index: %s", mdef->Name());
 2718     break;
 2719   }
 2720 
 2721   return false;
 2722 }
 2723 
 2724 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2725   assert(EnableVectorSupport, "sanity");
 2726   int lo = XMM0_num;
 2727   int hi = XMM0b_num;
 2728   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2729   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2730   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2731   return OptoRegPair(hi, lo);
 2732 }
 2733 
 2734 // Is this branch offset short enough that a short branch can be used?
 2735 //
 2736 // NOTE: If the platform does not provide any short branch variants, then
 2737 //       this method should return false for offset 0.
 2738 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2739   // The passed offset is relative to address of the branch.
 2740   // On 86 a branch displacement is calculated relative to address
 2741   // of a next instruction.
 2742   offset -= br_size;
 2743 
 2744   // the short version of jmpConUCF2 contains multiple branches,
 2745   // making the reach slightly less
 2746   if (rule == jmpConUCF2_rule)
 2747     return (-126 <= offset && offset <= 125);
 2748   return (-128 <= offset && offset <= 127);
 2749 }
 2750 
 2751 #ifdef ASSERT
 2752 // Return whether or not this register is ever used as an argument.
 2753 bool Matcher::can_be_java_arg(int reg)
 2754 {
 2755   return
 2756     reg ==  RDI_num || reg == RDI_H_num ||
 2757     reg ==  RSI_num || reg == RSI_H_num ||
 2758     reg ==  RDX_num || reg == RDX_H_num ||
 2759     reg ==  RCX_num || reg == RCX_H_num ||
 2760     reg ==   R8_num || reg ==  R8_H_num ||
 2761     reg ==   R9_num || reg ==  R9_H_num ||
 2762     reg ==  R12_num || reg == R12_H_num ||
 2763     reg == XMM0_num || reg == XMM0b_num ||
 2764     reg == XMM1_num || reg == XMM1b_num ||
 2765     reg == XMM2_num || reg == XMM2b_num ||
 2766     reg == XMM3_num || reg == XMM3b_num ||
 2767     reg == XMM4_num || reg == XMM4b_num ||
 2768     reg == XMM5_num || reg == XMM5b_num ||
 2769     reg == XMM6_num || reg == XMM6b_num ||
 2770     reg == XMM7_num || reg == XMM7b_num;
 2771 }
 2772 #endif
 2773 
 2774 uint Matcher::int_pressure_limit()
 2775 {
 2776   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2777 }
 2778 
 2779 uint Matcher::float_pressure_limit()
 2780 {
 2781   // After experiment around with different values, the following default threshold
 2782   // works best for LCM's register pressure scheduling on x64.
 2783   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2784   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2785   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2786 }
 2787 
 2788 // Register for DIVI projection of divmodI
 2789 const RegMask& Matcher::divI_proj_mask() {
 2790   return INT_RAX_REG_mask();
 2791 }
 2792 
 2793 // Register for MODI projection of divmodI
 2794 const RegMask& Matcher::modI_proj_mask() {
 2795   return INT_RDX_REG_mask();
 2796 }
 2797 
 2798 // Register for DIVL projection of divmodL
 2799 const RegMask& Matcher::divL_proj_mask() {
 2800   return LONG_RAX_REG_mask();
 2801 }
 2802 
 2803 // Register for MODL projection of divmodL
 2804 const RegMask& Matcher::modL_proj_mask() {
 2805   return LONG_RDX_REG_mask();
 2806 }
 2807 
 2808 %}
 2809 
 2810 source_hpp %{
 2811 // Header information of the source block.
 2812 // Method declarations/definitions which are used outside
 2813 // the ad-scope can conveniently be defined here.
 2814 //
 2815 // To keep related declarations/definitions/uses close together,
 2816 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2817 
 2818 #include "runtime/vm_version.hpp"
 2819 
 2820 class NativeJump;
 2821 
 2822 class CallStubImpl {
 2823 
 2824   //--------------------------------------------------------------
 2825   //---<  Used for optimization in Compile::shorten_branches  >---
 2826   //--------------------------------------------------------------
 2827 
 2828  public:
 2829   // Size of call trampoline stub.
 2830   static uint size_call_trampoline() {
 2831     return 0; // no call trampolines on this platform
 2832   }
 2833 
 2834   // number of relocations needed by a call trampoline stub
 2835   static uint reloc_call_trampoline() {
 2836     return 0; // no call trampolines on this platform
 2837   }
 2838 };
 2839 
 2840 class HandlerImpl {
 2841 
 2842  public:
 2843 
 2844   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2845 
 2846   static uint size_deopt_handler() {
 2847     // one call and one jmp.
 2848     return 7;
 2849   }
 2850 };
 2851 
 2852 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2853   switch(bytes) {
 2854     case  4: // fall-through
 2855     case  8: // fall-through
 2856     case 16: return Assembler::AVX_128bit;
 2857     case 32: return Assembler::AVX_256bit;
 2858     case 64: return Assembler::AVX_512bit;
 2859 
 2860     default: {
 2861       ShouldNotReachHere();
 2862       return Assembler::AVX_NoVec;
 2863     }
 2864   }
 2865 }
 2866 
 2867 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2868   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2869 }
 2870 
 2871 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2872   uint def_idx = use->operand_index(opnd);
 2873   Node* def = use->in(def_idx);
 2874   return vector_length_encoding(def);
 2875 }
 2876 
 2877 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2878   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2879          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2880 }
 2881 
 2882 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2883   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2884            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2885 }
 2886 
 2887 class Node::PD {
 2888 public:
 2889   enum NodeFlags : uint64_t {
 2890     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2891     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2892     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2893     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2894     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2895     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2896     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2897     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2898     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2899     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2900     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2901     Flag_ndd_demotable_opr1   = Node::_last_flag << 12,
 2902     Flag_ndd_demotable_opr2   = Node::_last_flag << 13,
 2903     _last_flag                = Flag_ndd_demotable_opr2
 2904   };
 2905 };
 2906 
 2907 %} // end source_hpp
 2908 
 2909 source %{
 2910 
 2911 #include "opto/addnode.hpp"
 2912 #include "c2_intelJccErratum_x86.hpp"
 2913 
 2914 void PhaseOutput::pd_perform_mach_node_analysis() {
 2915   if (VM_Version::has_intel_jcc_erratum()) {
 2916     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2917     _buf_sizes._code += extra_padding;
 2918   }
 2919 }
 2920 
 2921 int MachNode::pd_alignment_required() const {
 2922   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2923     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2924     return IntelJccErratum::largest_jcc_size() + 1;
 2925   } else {
 2926     return 1;
 2927   }
 2928 }
 2929 
 2930 int MachNode::compute_padding(int current_offset) const {
 2931   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2932     Compile* C = Compile::current();
 2933     PhaseOutput* output = C->output();
 2934     Block* block = output->block();
 2935     int index = output->index();
 2936     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2937   } else {
 2938     return 0;
 2939   }
 2940 }
 2941 
 2942 // Emit deopt handler code.
 2943 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2944 
 2945   // Note that the code buffer's insts_mark is always relative to insts.
 2946   // That's why we must use the macroassembler to generate a handler.
 2947   address base = __ start_a_stub(size_deopt_handler());
 2948   if (base == nullptr) {
 2949     ciEnv::current()->record_failure("CodeCache is full");
 2950     return 0;  // CodeBuffer::expand failed
 2951   }
 2952   int offset = __ offset();
 2953 
 2954   Label start;
 2955   __ bind(start);
 2956 
 2957   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2958 
 2959   int entry_offset = __ offset();
 2960 
 2961   __ jmp(start);
 2962 
 2963   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2964   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2965          "out of bounds read in post-call NOP check");
 2966   __ end_a_stub();
 2967   return entry_offset;
 2968 }
 2969 
 2970 static Assembler::Width widthForType(BasicType bt) {
 2971   if (bt == T_BYTE) {
 2972     return Assembler::B;
 2973   } else if (bt == T_SHORT) {
 2974     return Assembler::W;
 2975   } else if (bt == T_INT) {
 2976     return Assembler::D;
 2977   } else {
 2978     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2979     return Assembler::Q;
 2980   }
 2981 }
 2982 
 2983 //=============================================================================
 2984 
 2985   // Float masks come from different places depending on platform.
 2986   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2987   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2988   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2989   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2990   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2991   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2992   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2993   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2994   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2995   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2996   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2997   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2998   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2999   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 3000   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 3001   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 3002   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 3003   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 3004   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 3005 
 3006 //=============================================================================
 3007 bool Matcher::match_rule_supported(int opcode) {
 3008   if (!has_match_rule(opcode)) {
 3009     return false; // no match rule present
 3010   }
 3011   switch (opcode) {
 3012     case Op_AbsVL:
 3013     case Op_StoreVectorScatter:
 3014       if (UseAVX < 3) {
 3015         return false;
 3016       }
 3017       break;
 3018     case Op_PopCountI:
 3019     case Op_PopCountL:
 3020       if (!UsePopCountInstruction) {
 3021         return false;
 3022       }
 3023       break;
 3024     case Op_PopCountVI:
 3025       if (UseAVX < 2) {
 3026         return false;
 3027       }
 3028       break;
 3029     case Op_CompressV:
 3030     case Op_ExpandV:
 3031     case Op_PopCountVL:
 3032       if (UseAVX < 2) {
 3033         return false;
 3034       }
 3035       break;
 3036     case Op_MulVI:
 3037       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3038         return false;
 3039       }
 3040       break;
 3041     case Op_MulVL:
 3042       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3043         return false;
 3044       }
 3045       break;
 3046     case Op_MulReductionVL:
 3047       if (VM_Version::supports_avx512dq() == false) {
 3048         return false;
 3049       }
 3050       break;
 3051     case Op_AbsVB:
 3052     case Op_AbsVS:
 3053     case Op_AbsVI:
 3054     case Op_AddReductionVI:
 3055     case Op_AndReductionV:
 3056     case Op_OrReductionV:
 3057     case Op_XorReductionV:
 3058       if (UseSSE < 3) { // requires at least SSSE3
 3059         return false;
 3060       }
 3061       break;
 3062     case Op_MaxHF:
 3063     case Op_MinHF:
 3064       if (!VM_Version::supports_avx512vlbw()) {
 3065         return false;
 3066       }  // fallthrough
 3067     case Op_AddHF:
 3068     case Op_DivHF:
 3069     case Op_FmaHF:
 3070     case Op_MulHF:
 3071     case Op_ReinterpretS2HF:
 3072     case Op_ReinterpretHF2S:
 3073     case Op_SubHF:
 3074     case Op_SqrtHF:
 3075       if (!VM_Version::supports_avx512_fp16()) {
 3076         return false;
 3077       }
 3078       break;
 3079     case Op_VectorLoadShuffle:
 3080     case Op_VectorRearrange:
 3081     case Op_MulReductionVI:
 3082       if (UseSSE < 4) { // requires at least SSE4
 3083         return false;
 3084       }
 3085       break;
 3086     case Op_IsInfiniteF:
 3087     case Op_IsInfiniteD:
 3088       if (!VM_Version::supports_avx512dq()) {
 3089         return false;
 3090       }
 3091       break;
 3092     case Op_SqrtVD:
 3093     case Op_SqrtVF:
 3094     case Op_VectorMaskCmp:
 3095     case Op_VectorCastB2X:
 3096     case Op_VectorCastS2X:
 3097     case Op_VectorCastI2X:
 3098     case Op_VectorCastL2X:
 3099     case Op_VectorCastF2X:
 3100     case Op_VectorCastD2X:
 3101     case Op_VectorUCastB2X:
 3102     case Op_VectorUCastS2X:
 3103     case Op_VectorUCastI2X:
 3104     case Op_VectorMaskCast:
 3105       if (UseAVX < 1) { // enabled for AVX only
 3106         return false;
 3107       }
 3108       break;
 3109     case Op_PopulateIndex:
 3110       if (UseAVX < 2) {
 3111         return false;
 3112       }
 3113       break;
 3114     case Op_RoundVF:
 3115       if (UseAVX < 2) { // enabled for AVX2 only
 3116         return false;
 3117       }
 3118       break;
 3119     case Op_RoundVD:
 3120       if (UseAVX < 3) {
 3121         return false;  // enabled for AVX3 only
 3122       }
 3123       break;
 3124     case Op_CompareAndSwapL:
 3125     case Op_CompareAndSwapP:
 3126       break;
 3127     case Op_StrIndexOf:
 3128       if (!UseSSE42Intrinsics) {
 3129         return false;
 3130       }
 3131       break;
 3132     case Op_StrIndexOfChar:
 3133       if (!UseSSE42Intrinsics) {
 3134         return false;
 3135       }
 3136       break;
 3137     case Op_OnSpinWait:
 3138       if (VM_Version::supports_on_spin_wait() == false) {
 3139         return false;
 3140       }
 3141       break;
 3142     case Op_MulVB:
 3143     case Op_LShiftVB:
 3144     case Op_RShiftVB:
 3145     case Op_URShiftVB:
 3146     case Op_VectorInsert:
 3147     case Op_VectorLoadMask:
 3148     case Op_VectorStoreMask:
 3149     case Op_VectorBlend:
 3150       if (UseSSE < 4) {
 3151         return false;
 3152       }
 3153       break;
 3154     case Op_MaxD:
 3155     case Op_MaxF:
 3156     case Op_MinD:
 3157     case Op_MinF:
 3158       if (UseAVX < 1) { // enabled for AVX only
 3159         return false;
 3160       }
 3161       break;
 3162     case Op_CacheWB:
 3163     case Op_CacheWBPreSync:
 3164     case Op_CacheWBPostSync:
 3165       if (!VM_Version::supports_data_cache_line_flush()) {
 3166         return false;
 3167       }
 3168       break;
 3169     case Op_ExtractB:
 3170     case Op_ExtractL:
 3171     case Op_ExtractI:
 3172     case Op_RoundDoubleMode:
 3173       if (UseSSE < 4) {
 3174         return false;
 3175       }
 3176       break;
 3177     case Op_RoundDoubleModeV:
 3178       if (VM_Version::supports_avx() == false) {
 3179         return false; // 128bit vroundpd is not available
 3180       }
 3181       break;
 3182     case Op_LoadVectorGather:
 3183     case Op_LoadVectorGatherMasked:
 3184       if (UseAVX < 2) {
 3185         return false;
 3186       }
 3187       break;
 3188     case Op_FmaF:
 3189     case Op_FmaD:
 3190     case Op_FmaVD:
 3191     case Op_FmaVF:
 3192       if (!UseFMA) {
 3193         return false;
 3194       }
 3195       break;
 3196     case Op_MacroLogicV:
 3197       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3198         return false;
 3199       }
 3200       break;
 3201 
 3202     case Op_VectorCmpMasked:
 3203     case Op_VectorMaskGen:
 3204       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3205         return false;
 3206       }
 3207       break;
 3208     case Op_VectorMaskFirstTrue:
 3209     case Op_VectorMaskLastTrue:
 3210     case Op_VectorMaskTrueCount:
 3211     case Op_VectorMaskToLong:
 3212       if (UseAVX < 1) {
 3213          return false;
 3214       }
 3215       break;
 3216     case Op_RoundF:
 3217     case Op_RoundD:
 3218       break;
 3219     case Op_CopySignD:
 3220     case Op_CopySignF:
 3221       if (UseAVX < 3)  {
 3222         return false;
 3223       }
 3224       if (!VM_Version::supports_avx512vl()) {
 3225         return false;
 3226       }
 3227       break;
 3228     case Op_CompressBits:
 3229     case Op_ExpandBits:
 3230       if (!VM_Version::supports_bmi2()) {
 3231         return false;
 3232       }
 3233       break;
 3234     case Op_CompressM:
 3235       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3236         return false;
 3237       }
 3238       break;
 3239     case Op_ConvF2HF:
 3240     case Op_ConvHF2F:
 3241       if (!VM_Version::supports_float16()) {
 3242         return false;
 3243       }
 3244       break;
 3245     case Op_VectorCastF2HF:
 3246     case Op_VectorCastHF2F:
 3247       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3248         return false;
 3249       }
 3250       break;
 3251   }
 3252   return true;  // Match rules are supported by default.
 3253 }
 3254 
 3255 //------------------------------------------------------------------------
 3256 
 3257 static inline bool is_pop_count_instr_target(BasicType bt) {
 3258   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3259          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3260 }
 3261 
 3262 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3263   return match_rule_supported_vector(opcode, vlen, bt);
 3264 }
 3265 
 3266 // Identify extra cases that we might want to provide match rules for vector nodes and
 3267 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3268 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3269   if (!match_rule_supported(opcode)) {
 3270     return false;
 3271   }
 3272   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3273   //   * SSE2 supports 128bit vectors for all types;
 3274   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3275   //   * AVX2 supports 256bit vectors for all types;
 3276   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3277   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3278   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3279   // And MaxVectorSize is taken into account as well.
 3280   if (!vector_size_supported(bt, vlen)) {
 3281     return false;
 3282   }
 3283   // Special cases which require vector length follow:
 3284   //   * implementation limitations
 3285   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3286   //   * 128bit vroundpd instruction is present only in AVX1
 3287   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3288   switch (opcode) {
 3289     case Op_MaxVHF:
 3290     case Op_MinVHF:
 3291       if (!VM_Version::supports_avx512bw()) {
 3292         return false;
 3293       }
 3294     case Op_AddVHF:
 3295     case Op_DivVHF:
 3296     case Op_FmaVHF:
 3297     case Op_MulVHF:
 3298     case Op_SubVHF:
 3299     case Op_SqrtVHF:
 3300       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3301         return false;
 3302       }
 3303       if (!VM_Version::supports_avx512_fp16()) {
 3304         return false;
 3305       }
 3306       break;
 3307     case Op_AbsVF:
 3308     case Op_NegVF:
 3309       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3310         return false; // 512bit vandps and vxorps are not available
 3311       }
 3312       break;
 3313     case Op_AbsVD:
 3314     case Op_NegVD:
 3315       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3316         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3317       }
 3318       break;
 3319     case Op_RotateRightV:
 3320     case Op_RotateLeftV:
 3321       if (bt != T_INT && bt != T_LONG) {
 3322         return false;
 3323       } // fallthrough
 3324     case Op_MacroLogicV:
 3325       if (!VM_Version::supports_evex() ||
 3326           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3327         return false;
 3328       }
 3329       break;
 3330     case Op_ClearArray:
 3331     case Op_VectorMaskGen:
 3332     case Op_VectorCmpMasked:
 3333       if (!VM_Version::supports_avx512bw()) {
 3334         return false;
 3335       }
 3336       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3337         return false;
 3338       }
 3339       break;
 3340     case Op_LoadVectorMasked:
 3341     case Op_StoreVectorMasked:
 3342       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3343         return false;
 3344       }
 3345       break;
 3346     case Op_UMinV:
 3347     case Op_UMaxV:
 3348       if (UseAVX == 0) {
 3349         return false;
 3350       }
 3351       break;
 3352     case Op_UMinReductionV:
 3353     case Op_UMaxReductionV:
 3354       if (UseAVX == 0) {
 3355         return false;
 3356       }
 3357       if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
 3358         return false;
 3359       }
 3360       if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
 3361         return false;
 3362       }
 3363       break;
 3364     case Op_MaxV:
 3365     case Op_MinV:
 3366       if (UseSSE < 4 && is_integral_type(bt)) {
 3367         return false;
 3368       }
 3369       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3370           // Float/Double intrinsics are enabled for AVX family currently.
 3371           if (UseAVX == 0) {
 3372             return false;
 3373           }
 3374           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3375             return false;
 3376           }
 3377       }
 3378       break;
 3379     case Op_CallLeafVector:
 3380       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3381         return false;
 3382       }
 3383       break;
 3384     case Op_AddReductionVI:
 3385       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3386         return false;
 3387       }
 3388       // fallthrough
 3389     case Op_AndReductionV:
 3390     case Op_OrReductionV:
 3391     case Op_XorReductionV:
 3392       if (is_subword_type(bt) && (UseSSE < 4)) {
 3393         return false;
 3394       }
 3395       break;
 3396     case Op_MinReductionV:
 3397     case Op_MaxReductionV:
 3398       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3399         return false;
 3400       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3401         return false;
 3402       }
 3403       // Float/Double intrinsics enabled for AVX family.
 3404       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3405         return false;
 3406       }
 3407       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3408         return false;
 3409       }
 3410       break;
 3411     case Op_VectorBlend:
 3412       if (UseAVX == 0 && size_in_bits < 128) {
 3413         return false;
 3414       }
 3415       break;
 3416     case Op_VectorTest:
 3417       if (UseSSE < 4) {
 3418         return false; // Implementation limitation
 3419       } else if (size_in_bits < 32) {
 3420         return false; // Implementation limitation
 3421       }
 3422       break;
 3423     case Op_VectorLoadShuffle:
 3424     case Op_VectorRearrange:
 3425       if(vlen == 2) {
 3426         return false; // Implementation limitation due to how shuffle is loaded
 3427       } else if (size_in_bits == 256 && UseAVX < 2) {
 3428         return false; // Implementation limitation
 3429       }
 3430       break;
 3431     case Op_VectorLoadMask:
 3432     case Op_VectorMaskCast:
 3433       if (size_in_bits == 256 && UseAVX < 2) {
 3434         return false; // Implementation limitation
 3435       }
 3436       // fallthrough
 3437     case Op_VectorStoreMask:
 3438       if (vlen == 2) {
 3439         return false; // Implementation limitation
 3440       }
 3441       break;
 3442     case Op_PopulateIndex:
 3443       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3444         return false;
 3445       }
 3446       break;
 3447     case Op_VectorCastB2X:
 3448     case Op_VectorCastS2X:
 3449     case Op_VectorCastI2X:
 3450       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3451         return false;
 3452       }
 3453       break;
 3454     case Op_VectorCastL2X:
 3455       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3456         return false;
 3457       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3458         return false;
 3459       }
 3460       break;
 3461     case Op_VectorCastF2X: {
 3462         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3463         // happen after intermediate conversion to integer and special handling
 3464         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3465         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3466         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3467           return false;
 3468         }
 3469       }
 3470       // fallthrough
 3471     case Op_VectorCastD2X:
 3472       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3473         return false;
 3474       }
 3475       break;
 3476     case Op_VectorCastF2HF:
 3477     case Op_VectorCastHF2F:
 3478       if (!VM_Version::supports_f16c() &&
 3479          ((!VM_Version::supports_evex() ||
 3480          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3481         return false;
 3482       }
 3483       break;
 3484     case Op_RoundVD:
 3485       if (!VM_Version::supports_avx512dq()) {
 3486         return false;
 3487       }
 3488       break;
 3489     case Op_MulReductionVI:
 3490       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3491         return false;
 3492       }
 3493       break;
 3494     case Op_LoadVectorGatherMasked:
 3495       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3496         return false;
 3497       }
 3498       if (is_subword_type(bt) &&
 3499          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3500           (size_in_bits < 64)                                      ||
 3501           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3502         return false;
 3503       }
 3504       break;
 3505     case Op_StoreVectorScatterMasked:
 3506     case Op_StoreVectorScatter:
 3507       if (is_subword_type(bt)) {
 3508         return false;
 3509       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3510         return false;
 3511       }
 3512       // fallthrough
 3513     case Op_LoadVectorGather:
 3514       if (!is_subword_type(bt) && size_in_bits == 64) {
 3515         return false;
 3516       }
 3517       if (is_subword_type(bt) && size_in_bits < 64) {
 3518         return false;
 3519       }
 3520       break;
 3521     case Op_SaturatingAddV:
 3522     case Op_SaturatingSubV:
 3523       if (UseAVX < 1) {
 3524         return false; // Implementation limitation
 3525       }
 3526       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3527         return false;
 3528       }
 3529       break;
 3530     case Op_SelectFromTwoVector:
 3531        if (size_in_bits < 128) {
 3532          return false;
 3533        }
 3534        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3535          return false;
 3536        }
 3537        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3538          return false;
 3539        }
 3540        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3541          return false;
 3542        }
 3543        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3544          return false;
 3545        }
 3546        break;
 3547     case Op_MaskAll:
 3548       if (!VM_Version::supports_evex()) {
 3549         return false;
 3550       }
 3551       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3552         return false;
 3553       }
 3554       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3555         return false;
 3556       }
 3557       break;
 3558     case Op_VectorMaskCmp:
 3559       if (vlen < 2 || size_in_bits < 32) {
 3560         return false;
 3561       }
 3562       break;
 3563     case Op_CompressM:
 3564       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3565         return false;
 3566       }
 3567       break;
 3568     case Op_CompressV:
 3569     case Op_ExpandV:
 3570       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3571         return false;
 3572       }
 3573       if (size_in_bits < 128 ) {
 3574         return false;
 3575       }
 3576     case Op_VectorLongToMask:
 3577       if (UseAVX < 1) {
 3578         return false;
 3579       }
 3580       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3581         return false;
 3582       }
 3583       break;
 3584     case Op_SignumVD:
 3585     case Op_SignumVF:
 3586       if (UseAVX < 1) {
 3587         return false;
 3588       }
 3589       break;
 3590     case Op_PopCountVI:
 3591     case Op_PopCountVL: {
 3592         if (!is_pop_count_instr_target(bt) &&
 3593             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3594           return false;
 3595         }
 3596       }
 3597       break;
 3598     case Op_ReverseV:
 3599     case Op_ReverseBytesV:
 3600       if (UseAVX < 2) {
 3601         return false;
 3602       }
 3603       break;
 3604     case Op_CountTrailingZerosV:
 3605     case Op_CountLeadingZerosV:
 3606       if (UseAVX < 2) {
 3607         return false;
 3608       }
 3609       break;
 3610   }
 3611   return true;  // Per default match rules are supported.
 3612 }
 3613 
 3614 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3615   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3616   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3617   // of their non-masked counterpart with mask edge being the differentiator.
 3618   // This routine does a strict check on the existence of masked operation patterns
 3619   // by returning a default false value for all the other opcodes apart from the
 3620   // ones whose masked instruction patterns are defined in this file.
 3621   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3622     return false;
 3623   }
 3624 
 3625   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3626   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3627     return false;
 3628   }
 3629   switch(opcode) {
 3630     // Unary masked operations
 3631     case Op_AbsVB:
 3632     case Op_AbsVS:
 3633       if(!VM_Version::supports_avx512bw()) {
 3634         return false;  // Implementation limitation
 3635       }
 3636     case Op_AbsVI:
 3637     case Op_AbsVL:
 3638       return true;
 3639 
 3640     // Ternary masked operations
 3641     case Op_FmaVF:
 3642     case Op_FmaVD:
 3643       return true;
 3644 
 3645     case Op_MacroLogicV:
 3646       if(bt != T_INT && bt != T_LONG) {
 3647         return false;
 3648       }
 3649       return true;
 3650 
 3651     // Binary masked operations
 3652     case Op_AddVB:
 3653     case Op_AddVS:
 3654     case Op_SubVB:
 3655     case Op_SubVS:
 3656     case Op_MulVS:
 3657     case Op_LShiftVS:
 3658     case Op_RShiftVS:
 3659     case Op_URShiftVS:
 3660       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3661       if (!VM_Version::supports_avx512bw()) {
 3662         return false;  // Implementation limitation
 3663       }
 3664       return true;
 3665 
 3666     case Op_MulVL:
 3667       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3668       if (!VM_Version::supports_avx512dq()) {
 3669         return false;  // Implementation limitation
 3670       }
 3671       return true;
 3672 
 3673     case Op_AndV:
 3674     case Op_OrV:
 3675     case Op_XorV:
 3676     case Op_RotateRightV:
 3677     case Op_RotateLeftV:
 3678       if (bt != T_INT && bt != T_LONG) {
 3679         return false; // Implementation limitation
 3680       }
 3681       return true;
 3682 
 3683     case Op_VectorLoadMask:
 3684       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3685       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3686         return false;
 3687       }
 3688       return true;
 3689 
 3690     case Op_AddVI:
 3691     case Op_AddVL:
 3692     case Op_AddVF:
 3693     case Op_AddVD:
 3694     case Op_SubVI:
 3695     case Op_SubVL:
 3696     case Op_SubVF:
 3697     case Op_SubVD:
 3698     case Op_MulVI:
 3699     case Op_MulVF:
 3700     case Op_MulVD:
 3701     case Op_DivVF:
 3702     case Op_DivVD:
 3703     case Op_SqrtVF:
 3704     case Op_SqrtVD:
 3705     case Op_LShiftVI:
 3706     case Op_LShiftVL:
 3707     case Op_RShiftVI:
 3708     case Op_RShiftVL:
 3709     case Op_URShiftVI:
 3710     case Op_URShiftVL:
 3711     case Op_LoadVectorMasked:
 3712     case Op_StoreVectorMasked:
 3713     case Op_LoadVectorGatherMasked:
 3714     case Op_StoreVectorScatterMasked:
 3715       return true;
 3716 
 3717     case Op_UMinV:
 3718     case Op_UMaxV:
 3719       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3720         return false;
 3721       } // fallthrough
 3722     case Op_MaxV:
 3723     case Op_MinV:
 3724       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3725         return false; // Implementation limitation
 3726       }
 3727       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3728         return false; // Implementation limitation
 3729       }
 3730       return true;
 3731     case Op_SaturatingAddV:
 3732     case Op_SaturatingSubV:
 3733       if (!is_subword_type(bt)) {
 3734         return false;
 3735       }
 3736       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3737         return false; // Implementation limitation
 3738       }
 3739       return true;
 3740 
 3741     case Op_VectorMaskCmp:
 3742       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3743         return false; // Implementation limitation
 3744       }
 3745       return true;
 3746 
 3747     case Op_VectorRearrange:
 3748       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3749         return false; // Implementation limitation
 3750       }
 3751       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3752         return false; // Implementation limitation
 3753       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3754         return false; // Implementation limitation
 3755       }
 3756       return true;
 3757 
 3758     // Binary Logical operations
 3759     case Op_AndVMask:
 3760     case Op_OrVMask:
 3761     case Op_XorVMask:
 3762       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3763         return false; // Implementation limitation
 3764       }
 3765       return true;
 3766 
 3767     case Op_PopCountVI:
 3768     case Op_PopCountVL:
 3769       if (!is_pop_count_instr_target(bt)) {
 3770         return false;
 3771       }
 3772       return true;
 3773 
 3774     case Op_MaskAll:
 3775       return true;
 3776 
 3777     case Op_CountLeadingZerosV:
 3778       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3779         return true;
 3780       }
 3781     default:
 3782       return false;
 3783   }
 3784 }
 3785 
 3786 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3787   return false;
 3788 }
 3789 
 3790 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3791 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3792   switch (elem_bt) {
 3793     case T_BYTE:  return false;
 3794     case T_SHORT: return !VM_Version::supports_avx512bw();
 3795     case T_INT:   return !VM_Version::supports_avx();
 3796     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3797     default:
 3798       ShouldNotReachHere();
 3799       return false;
 3800   }
 3801 }
 3802 
 3803 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3804   // Prefer predicate if the mask type is "TypeVectMask".
 3805   return vt->isa_vectmask() != nullptr;
 3806 }
 3807 
 3808 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3809   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3810   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3811   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3812       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3813     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3814     return new legVecZOper();
 3815   }
 3816   if (legacy) {
 3817     switch (ideal_reg) {
 3818       case Op_VecS: return new legVecSOper();
 3819       case Op_VecD: return new legVecDOper();
 3820       case Op_VecX: return new legVecXOper();
 3821       case Op_VecY: return new legVecYOper();
 3822       case Op_VecZ: return new legVecZOper();
 3823     }
 3824   } else {
 3825     switch (ideal_reg) {
 3826       case Op_VecS: return new vecSOper();
 3827       case Op_VecD: return new vecDOper();
 3828       case Op_VecX: return new vecXOper();
 3829       case Op_VecY: return new vecYOper();
 3830       case Op_VecZ: return new vecZOper();
 3831     }
 3832   }
 3833   ShouldNotReachHere();
 3834   return nullptr;
 3835 }
 3836 
 3837 bool Matcher::is_reg2reg_move(MachNode* m) {
 3838   switch (m->rule()) {
 3839     case MoveVec2Leg_rule:
 3840     case MoveLeg2Vec_rule:
 3841     case MoveF2VL_rule:
 3842     case MoveF2LEG_rule:
 3843     case MoveVL2F_rule:
 3844     case MoveLEG2F_rule:
 3845     case MoveD2VL_rule:
 3846     case MoveD2LEG_rule:
 3847     case MoveVL2D_rule:
 3848     case MoveLEG2D_rule:
 3849       return true;
 3850     default:
 3851       return false;
 3852   }
 3853 }
 3854 
 3855 bool Matcher::is_generic_vector(MachOper* opnd) {
 3856   switch (opnd->opcode()) {
 3857     case VEC:
 3858     case LEGVEC:
 3859       return true;
 3860     default:
 3861       return false;
 3862   }
 3863 }
 3864 
 3865 //------------------------------------------------------------------------
 3866 
 3867 const RegMask* Matcher::predicate_reg_mask(void) {
 3868   return &_VECTMASK_REG_mask;
 3869 }
 3870 
 3871 // Max vector size in bytes. 0 if not supported.
 3872 int Matcher::vector_width_in_bytes(BasicType bt) {
 3873   assert(is_java_primitive(bt), "only primitive type vectors");
 3874   // SSE2 supports 128bit vectors for all types.
 3875   // AVX2 supports 256bit vectors for all types.
 3876   // AVX2/EVEX supports 512bit vectors for all types.
 3877   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3878   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3879   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3880     size = (UseAVX > 2) ? 64 : 32;
 3881   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3882     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3883   // Use flag to limit vector size.
 3884   size = MIN2(size,(int)MaxVectorSize);
 3885   // Minimum 2 values in vector (or 4 for bytes).
 3886   switch (bt) {
 3887   case T_DOUBLE:
 3888   case T_LONG:
 3889     if (size < 16) return 0;
 3890     break;
 3891   case T_FLOAT:
 3892   case T_INT:
 3893     if (size < 8) return 0;
 3894     break;
 3895   case T_BOOLEAN:
 3896     if (size < 4) return 0;
 3897     break;
 3898   case T_CHAR:
 3899     if (size < 4) return 0;
 3900     break;
 3901   case T_BYTE:
 3902     if (size < 4) return 0;
 3903     break;
 3904   case T_SHORT:
 3905     if (size < 4) return 0;
 3906     break;
 3907   default:
 3908     ShouldNotReachHere();
 3909   }
 3910   return size;
 3911 }
 3912 
 3913 // Limits on vector size (number of elements) loaded into vector.
 3914 int Matcher::max_vector_size(const BasicType bt) {
 3915   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3916 }
 3917 int Matcher::min_vector_size(const BasicType bt) {
 3918   int max_size = max_vector_size(bt);
 3919   // Min size which can be loaded into vector is 4 bytes.
 3920   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3921   // Support for calling svml double64 vectors
 3922   if (bt == T_DOUBLE) {
 3923     size = 1;
 3924   }
 3925   return MIN2(size,max_size);
 3926 }
 3927 
 3928 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3929   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3930   // by default on Cascade Lake
 3931   if (VM_Version::is_default_intel_cascade_lake()) {
 3932     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3933   }
 3934   return Matcher::max_vector_size(bt);
 3935 }
 3936 
 3937 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3938   return -1;
 3939 }
 3940 
 3941 // Vector ideal reg corresponding to specified size in bytes
 3942 uint Matcher::vector_ideal_reg(int size) {
 3943   assert(MaxVectorSize >= size, "");
 3944   switch(size) {
 3945     case  4: return Op_VecS;
 3946     case  8: return Op_VecD;
 3947     case 16: return Op_VecX;
 3948     case 32: return Op_VecY;
 3949     case 64: return Op_VecZ;
 3950   }
 3951   ShouldNotReachHere();
 3952   return 0;
 3953 }
 3954 
 3955 // Check for shift by small constant as well
 3956 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3957   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3958       shift->in(2)->get_int() <= 3 &&
 3959       // Are there other uses besides address expressions?
 3960       !matcher->is_visited(shift)) {
 3961     address_visited.set(shift->_idx); // Flag as address_visited
 3962     mstack.push(shift->in(2), Matcher::Visit);
 3963     Node *conv = shift->in(1);
 3964     // Allow Matcher to match the rule which bypass
 3965     // ConvI2L operation for an array index on LP64
 3966     // if the index value is positive.
 3967     if (conv->Opcode() == Op_ConvI2L &&
 3968         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3969         // Are there other uses besides address expressions?
 3970         !matcher->is_visited(conv)) {
 3971       address_visited.set(conv->_idx); // Flag as address_visited
 3972       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3973     } else {
 3974       mstack.push(conv, Matcher::Pre_Visit);
 3975     }
 3976     return true;
 3977   }
 3978   return false;
 3979 }
 3980 
 3981 // This function identifies sub-graphs in which a 'load' node is
 3982 // input to two different nodes, and such that it can be matched
 3983 // with BMI instructions like blsi, blsr, etc.
 3984 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3985 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3986 // refers to the same node.
 3987 //
 3988 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3989 // This is a temporary solution until we make DAGs expressible in ADL.
 3990 template<typename ConType>
 3991 class FusedPatternMatcher {
 3992   Node* _op1_node;
 3993   Node* _mop_node;
 3994   int _con_op;
 3995 
 3996   static int match_next(Node* n, int next_op, int next_op_idx) {
 3997     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3998       return -1;
 3999     }
 4000 
 4001     if (next_op_idx == -1) { // n is commutative, try rotations
 4002       if (n->in(1)->Opcode() == next_op) {
 4003         return 1;
 4004       } else if (n->in(2)->Opcode() == next_op) {
 4005         return 2;
 4006       }
 4007     } else {
 4008       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 4009       if (n->in(next_op_idx)->Opcode() == next_op) {
 4010         return next_op_idx;
 4011       }
 4012     }
 4013     return -1;
 4014   }
 4015 
 4016  public:
 4017   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 4018     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 4019 
 4020   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 4021              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4022              typename ConType::NativeType con_value) {
 4023     if (_op1_node->Opcode() != op1) {
 4024       return false;
 4025     }
 4026     if (_mop_node->outcnt() > 2) {
 4027       return false;
 4028     }
 4029     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4030     if (op1_op2_idx == -1) {
 4031       return false;
 4032     }
 4033     // Memory operation must be the other edge
 4034     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4035 
 4036     // Check that the mop node is really what we want
 4037     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4038       Node* op2_node = _op1_node->in(op1_op2_idx);
 4039       if (op2_node->outcnt() > 1) {
 4040         return false;
 4041       }
 4042       assert(op2_node->Opcode() == op2, "Should be");
 4043       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4044       if (op2_con_idx == -1) {
 4045         return false;
 4046       }
 4047       // Memory operation must be the other edge
 4048       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4049       // Check that the memory operation is the same node
 4050       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4051         // Now check the constant
 4052         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4053         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4054           return true;
 4055         }
 4056       }
 4057     }
 4058     return false;
 4059   }
 4060 };
 4061 
 4062 static bool is_bmi_pattern(Node* n, Node* m) {
 4063   assert(UseBMI1Instructions, "sanity");
 4064   if (n != nullptr && m != nullptr) {
 4065     if (m->Opcode() == Op_LoadI) {
 4066       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4067       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4068              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4069              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4070     } else if (m->Opcode() == Op_LoadL) {
 4071       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4072       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4073              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4074              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4075     }
 4076   }
 4077   return false;
 4078 }
 4079 
 4080 // Should the matcher clone input 'm' of node 'n'?
 4081 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4082   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4083   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
 4084     mstack.push(m, Visit);
 4085     return true;
 4086   }
 4087   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4088     mstack.push(m, Visit);           // m = ShiftCntV
 4089     return true;
 4090   }
 4091   if (is_encode_and_store_pattern(n, m)) {
 4092     mstack.push(m, Visit);
 4093     return true;
 4094   }
 4095   return false;
 4096 }
 4097 
 4098 // Should the Matcher clone shifts on addressing modes, expecting them
 4099 // to be subsumed into complex addressing expressions or compute them
 4100 // into registers?
 4101 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4102   Node *off = m->in(AddPNode::Offset);
 4103   if (off->is_Con()) {
 4104     address_visited.test_set(m->_idx); // Flag as address_visited
 4105     Node *adr = m->in(AddPNode::Address);
 4106 
 4107     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4108     // AtomicAdd is not an addressing expression.
 4109     // Cheap to find it by looking for screwy base.
 4110     if (adr->is_AddP() &&
 4111         !adr->in(AddPNode::Base)->is_top() &&
 4112         !adr->in(AddPNode::Offset)->is_Con() &&
 4113         off->get_long() == (int) (off->get_long()) && // immL32
 4114         // Are there other uses besides address expressions?
 4115         !is_visited(adr)) {
 4116       address_visited.set(adr->_idx); // Flag as address_visited
 4117       Node *shift = adr->in(AddPNode::Offset);
 4118       if (!clone_shift(shift, this, mstack, address_visited)) {
 4119         mstack.push(shift, Pre_Visit);
 4120       }
 4121       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4122       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4123     } else {
 4124       mstack.push(adr, Pre_Visit);
 4125     }
 4126 
 4127     // Clone X+offset as it also folds into most addressing expressions
 4128     mstack.push(off, Visit);
 4129     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4130     return true;
 4131   } else if (clone_shift(off, this, mstack, address_visited)) {
 4132     address_visited.test_set(m->_idx); // Flag as address_visited
 4133     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4134     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4135     return true;
 4136   }
 4137   return false;
 4138 }
 4139 
 4140 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4141   switch (bt) {
 4142     case BoolTest::eq:
 4143       return Assembler::eq;
 4144     case BoolTest::ne:
 4145       return Assembler::neq;
 4146     case BoolTest::le:
 4147     case BoolTest::ule:
 4148       return Assembler::le;
 4149     case BoolTest::ge:
 4150     case BoolTest::uge:
 4151       return Assembler::nlt;
 4152     case BoolTest::lt:
 4153     case BoolTest::ult:
 4154       return Assembler::lt;
 4155     case BoolTest::gt:
 4156     case BoolTest::ugt:
 4157       return Assembler::nle;
 4158     default : ShouldNotReachHere(); return Assembler::_false;
 4159   }
 4160 }
 4161 
 4162 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4163   switch (bt) {
 4164   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4165   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4166   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4167   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4168   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4169   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4170   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4171   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4172   }
 4173 }
 4174 
 4175 // Helper methods for MachSpillCopyNode::implementation().
 4176 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4177                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4178   assert(ireg == Op_VecS || // 32bit vector
 4179          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4180           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4181          "no non-adjacent vector moves" );
 4182   if (masm) {
 4183     switch (ireg) {
 4184     case Op_VecS: // copy whole register
 4185     case Op_VecD:
 4186     case Op_VecX:
 4187       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4188         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4189       } else {
 4190         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4191      }
 4192       break;
 4193     case Op_VecY:
 4194       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4195         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4196       } else {
 4197         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4198      }
 4199       break;
 4200     case Op_VecZ:
 4201       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4202       break;
 4203     default:
 4204       ShouldNotReachHere();
 4205     }
 4206 #ifndef PRODUCT
 4207   } else {
 4208     switch (ireg) {
 4209     case Op_VecS:
 4210     case Op_VecD:
 4211     case Op_VecX:
 4212       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4213       break;
 4214     case Op_VecY:
 4215     case Op_VecZ:
 4216       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4217       break;
 4218     default:
 4219       ShouldNotReachHere();
 4220     }
 4221 #endif
 4222   }
 4223 }
 4224 
 4225 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4226                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4227   if (masm) {
 4228     if (is_load) {
 4229       switch (ireg) {
 4230       case Op_VecS:
 4231         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4232         break;
 4233       case Op_VecD:
 4234         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4235         break;
 4236       case Op_VecX:
 4237         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4238           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4239         } else {
 4240           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4241           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4242         }
 4243         break;
 4244       case Op_VecY:
 4245         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4246           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4247         } else {
 4248           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4249           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4250         }
 4251         break;
 4252       case Op_VecZ:
 4253         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4254         break;
 4255       default:
 4256         ShouldNotReachHere();
 4257       }
 4258     } else { // store
 4259       switch (ireg) {
 4260       case Op_VecS:
 4261         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4262         break;
 4263       case Op_VecD:
 4264         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4265         break;
 4266       case Op_VecX:
 4267         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4268           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4269         }
 4270         else {
 4271           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4272         }
 4273         break;
 4274       case Op_VecY:
 4275         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4276           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4277         }
 4278         else {
 4279           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4280         }
 4281         break;
 4282       case Op_VecZ:
 4283         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4284         break;
 4285       default:
 4286         ShouldNotReachHere();
 4287       }
 4288     }
 4289 #ifndef PRODUCT
 4290   } else {
 4291     if (is_load) {
 4292       switch (ireg) {
 4293       case Op_VecS:
 4294         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4295         break;
 4296       case Op_VecD:
 4297         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4298         break;
 4299        case Op_VecX:
 4300         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4301         break;
 4302       case Op_VecY:
 4303       case Op_VecZ:
 4304         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4305         break;
 4306       default:
 4307         ShouldNotReachHere();
 4308       }
 4309     } else { // store
 4310       switch (ireg) {
 4311       case Op_VecS:
 4312         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4313         break;
 4314       case Op_VecD:
 4315         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4316         break;
 4317        case Op_VecX:
 4318         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4319         break;
 4320       case Op_VecY:
 4321       case Op_VecZ:
 4322         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4323         break;
 4324       default:
 4325         ShouldNotReachHere();
 4326       }
 4327     }
 4328 #endif
 4329   }
 4330 }
 4331 
 4332 template <class T>
 4333 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4334   int size = type2aelembytes(bt) * len;
 4335   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4336   for (int i = 0; i < len; i++) {
 4337     int offset = i * type2aelembytes(bt);
 4338     switch (bt) {
 4339       case T_BYTE: val->at(i) = con; break;
 4340       case T_SHORT: {
 4341         jshort c = con;
 4342         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4343         break;
 4344       }
 4345       case T_INT: {
 4346         jint c = con;
 4347         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4348         break;
 4349       }
 4350       case T_LONG: {
 4351         jlong c = con;
 4352         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4353         break;
 4354       }
 4355       case T_FLOAT: {
 4356         jfloat c = con;
 4357         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4358         break;
 4359       }
 4360       case T_DOUBLE: {
 4361         jdouble c = con;
 4362         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4363         break;
 4364       }
 4365       default: assert(false, "%s", type2name(bt));
 4366     }
 4367   }
 4368   return val;
 4369 }
 4370 
 4371 static inline jlong high_bit_set(BasicType bt) {
 4372   switch (bt) {
 4373     case T_BYTE:  return 0x8080808080808080;
 4374     case T_SHORT: return 0x8000800080008000;
 4375     case T_INT:   return 0x8000000080000000;
 4376     case T_LONG:  return 0x8000000000000000;
 4377     default:
 4378       ShouldNotReachHere();
 4379       return 0;
 4380   }
 4381 }
 4382 
 4383 #ifndef PRODUCT
 4384   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4385     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4386   }
 4387 #endif
 4388 
 4389   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4390     __ nop(_count);
 4391   }
 4392 
 4393   uint MachNopNode::size(PhaseRegAlloc*) const {
 4394     return _count;
 4395   }
 4396 
 4397 #ifndef PRODUCT
 4398   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4399     st->print("# breakpoint");
 4400   }
 4401 #endif
 4402 
 4403   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4404     __ int3();
 4405   }
 4406 
 4407   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4408     return MachNode::size(ra_);
 4409   }
 4410 
 4411 %}
 4412 
 4413 //----------ENCODING BLOCK-----------------------------------------------------
 4414 // This block specifies the encoding classes used by the compiler to
 4415 // output byte streams.  Encoding classes are parameterized macros
 4416 // used by Machine Instruction Nodes in order to generate the bit
 4417 // encoding of the instruction.  Operands specify their base encoding
 4418 // interface with the interface keyword.  There are currently
 4419 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4420 // COND_INTER.  REG_INTER causes an operand to generate a function
 4421 // which returns its register number when queried.  CONST_INTER causes
 4422 // an operand to generate a function which returns the value of the
 4423 // constant when queried.  MEMORY_INTER causes an operand to generate
 4424 // four functions which return the Base Register, the Index Register,
 4425 // the Scale Value, and the Offset Value of the operand when queried.
 4426 // COND_INTER causes an operand to generate six functions which return
 4427 // the encoding code (ie - encoding bits for the instruction)
 4428 // associated with each basic boolean condition for a conditional
 4429 // instruction.
 4430 //
 4431 // Instructions specify two basic values for encoding.  Again, a
 4432 // function is available to check if the constant displacement is an
 4433 // oop. They use the ins_encode keyword to specify their encoding
 4434 // classes (which must be a sequence of enc_class names, and their
 4435 // parameters, specified in the encoding block), and they use the
 4436 // opcode keyword to specify, in order, their primary, secondary, and
 4437 // tertiary opcode.  Only the opcode sections which a particular
 4438 // instruction needs for encoding need to be specified.
 4439 encode %{
 4440   enc_class cdql_enc(no_rax_rdx_RegI div)
 4441   %{
 4442     // Full implementation of Java idiv and irem; checks for
 4443     // special case as described in JVM spec., p.243 & p.271.
 4444     //
 4445     //         normal case                           special case
 4446     //
 4447     // input : rax: dividend                         min_int
 4448     //         reg: divisor                          -1
 4449     //
 4450     // output: rax: quotient  (= rax idiv reg)       min_int
 4451     //         rdx: remainder (= rax irem reg)       0
 4452     //
 4453     //  Code sequnce:
 4454     //
 4455     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4456     //    5:   75 07/08                jne    e <normal>
 4457     //    7:   33 d2                   xor    %edx,%edx
 4458     //  [div >= 8 -> offset + 1]
 4459     //  [REX_B]
 4460     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4461     //    c:   74 03/04                je     11 <done>
 4462     // 000000000000000e <normal>:
 4463     //    e:   99                      cltd
 4464     //  [div >= 8 -> offset + 1]
 4465     //  [REX_B]
 4466     //    f:   f7 f9                   idiv   $div
 4467     // 0000000000000011 <done>:
 4468     Label normal;
 4469     Label done;
 4470 
 4471     // cmp    $0x80000000,%eax
 4472     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4473 
 4474     // jne    e <normal>
 4475     __ jccb(Assembler::notEqual, normal);
 4476 
 4477     // xor    %edx,%edx
 4478     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4479 
 4480     // cmp    $0xffffffffffffffff,%ecx
 4481     __ cmpl($div$$Register, -1);
 4482 
 4483     // je     11 <done>
 4484     __ jccb(Assembler::equal, done);
 4485 
 4486     // <normal>
 4487     // cltd
 4488     __ bind(normal);
 4489     __ cdql();
 4490 
 4491     // idivl
 4492     // <done>
 4493     __ idivl($div$$Register);
 4494     __ bind(done);
 4495   %}
 4496 
 4497   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4498   %{
 4499     // Full implementation of Java ldiv and lrem; checks for
 4500     // special case as described in JVM spec., p.243 & p.271.
 4501     //
 4502     //         normal case                           special case
 4503     //
 4504     // input : rax: dividend                         min_long
 4505     //         reg: divisor                          -1
 4506     //
 4507     // output: rax: quotient  (= rax idiv reg)       min_long
 4508     //         rdx: remainder (= rax irem reg)       0
 4509     //
 4510     //  Code sequnce:
 4511     //
 4512     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4513     //    7:   00 00 80
 4514     //    a:   48 39 d0                cmp    %rdx,%rax
 4515     //    d:   75 08                   jne    17 <normal>
 4516     //    f:   33 d2                   xor    %edx,%edx
 4517     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4518     //   15:   74 05                   je     1c <done>
 4519     // 0000000000000017 <normal>:
 4520     //   17:   48 99                   cqto
 4521     //   19:   48 f7 f9                idiv   $div
 4522     // 000000000000001c <done>:
 4523     Label normal;
 4524     Label done;
 4525 
 4526     // mov    $0x8000000000000000,%rdx
 4527     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4528 
 4529     // cmp    %rdx,%rax
 4530     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4531 
 4532     // jne    17 <normal>
 4533     __ jccb(Assembler::notEqual, normal);
 4534 
 4535     // xor    %edx,%edx
 4536     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4537 
 4538     // cmp    $0xffffffffffffffff,$div
 4539     __ cmpq($div$$Register, -1);
 4540 
 4541     // je     1e <done>
 4542     __ jccb(Assembler::equal, done);
 4543 
 4544     // <normal>
 4545     // cqto
 4546     __ bind(normal);
 4547     __ cdqq();
 4548 
 4549     // idivq (note: must be emitted by the user of this rule)
 4550     // <done>
 4551     __ idivq($div$$Register);
 4552     __ bind(done);
 4553   %}
 4554 
 4555   enc_class clear_avx %{
 4556     DEBUG_ONLY(int off0 = __ offset());
 4557     if (generate_vzeroupper(Compile::current())) {
 4558       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4559       // Clear upper bits of YMM registers when current compiled code uses
 4560       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4561       __ vzeroupper();
 4562     }
 4563     DEBUG_ONLY(int off1 = __ offset());
 4564     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4565   %}
 4566 
 4567   enc_class Java_To_Runtime(method meth) %{
 4568     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4569     __ call(r10);
 4570     __ post_call_nop();
 4571   %}
 4572 
 4573   enc_class Java_Static_Call(method meth)
 4574   %{
 4575     // JAVA STATIC CALL
 4576     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4577     // determine who we intended to call.
 4578     if (!_method) {
 4579       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4580     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4581       // The NOP here is purely to ensure that eliding a call to
 4582       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4583       __ nop(5);
 4584       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4585     } else {
 4586       int method_index = resolved_method_index(masm);
 4587       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4588                                                   : static_call_Relocation::spec(method_index);
 4589       address mark = __ pc();
 4590       int call_offset = __ offset();
 4591       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4592       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4593         // Calls of the same statically bound method can share
 4594         // a stub to the interpreter.
 4595         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4596       } else {
 4597         // Emit stubs for static call.
 4598         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4599         __ clear_inst_mark();
 4600         if (stub == nullptr) {
 4601           ciEnv::current()->record_failure("CodeCache is full");
 4602           return;
 4603         }
 4604       }
 4605     }
 4606     __ post_call_nop();
 4607   %}
 4608 
 4609   enc_class Java_Dynamic_Call(method meth) %{
 4610     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4611     __ post_call_nop();
 4612   %}
 4613 
 4614   enc_class call_epilog %{
 4615     if (VerifyStackAtCalls) {
 4616       // Check that stack depth is unchanged: find majik cookie on stack
 4617       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4618       Label L;
 4619       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4620       __ jccb(Assembler::equal, L);
 4621       // Die if stack mismatch
 4622       __ int3();
 4623       __ bind(L);
 4624     }
 4625     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4626       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4627       // Search for the corresponding projection, get the register and emit code that initialized it.
 4628       uint con = (tf()->range_cc()->cnt() - 1);
 4629       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4630         ProjNode* proj = fast_out(i)->as_Proj();
 4631         if (proj->_con == con) {
 4632           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4633           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4634           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4635           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4636           __ testq(rax, rax);
 4637           __ setb(Assembler::notZero, toReg);
 4638           __ movzbl(toReg, toReg);
 4639           if (reg->is_stack()) {
 4640             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4641             __ movq(Address(rsp, st_off), toReg);
 4642           }
 4643           break;
 4644         }
 4645       }
 4646       if (return_value_is_used()) {
 4647         // An inline type is returned as fields in multiple registers.
 4648         // Rax either contains an oop if the inline type is buffered or a pointer
 4649         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4650         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4651         // rax &= (rax & 1) - 1
 4652         __ movptr(rscratch1, rax);
 4653         __ andptr(rscratch1, 0x1);
 4654         __ subptr(rscratch1, 0x1);
 4655         __ andptr(rax, rscratch1);
 4656       }
 4657     }
 4658   %}
 4659 
 4660 %}
 4661 
 4662 //----------FRAME--------------------------------------------------------------
 4663 // Definition of frame structure and management information.
 4664 //
 4665 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4666 //                             |   (to get allocators register number
 4667 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4668 //  r   CALLER     |        |
 4669 //  o     |        +--------+      pad to even-align allocators stack-slot
 4670 //  w     V        |  pad0  |        numbers; owned by CALLER
 4671 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4672 //  h     ^        |   in   |  5
 4673 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4674 //  |     |        |        |  3
 4675 //  |     |        +--------+
 4676 //  V     |        | old out|      Empty on Intel, window on Sparc
 4677 //        |    old |preserve|      Must be even aligned.
 4678 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4679 //        |        |   in   |  3   area for Intel ret address
 4680 //     Owned by    |preserve|      Empty on Sparc.
 4681 //       SELF      +--------+
 4682 //        |        |  pad2  |  2   pad to align old SP
 4683 //        |        +--------+  1
 4684 //        |        | locks  |  0
 4685 //        |        +--------+----> OptoReg::stack0(), even aligned
 4686 //        |        |  pad1  | 11   pad to align new SP
 4687 //        |        +--------+
 4688 //        |        |        | 10
 4689 //        |        | spills |  9   spills
 4690 //        V        |        |  8   (pad0 slot for callee)
 4691 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4692 //        ^        |  out   |  7
 4693 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4694 //     Owned by    +--------+
 4695 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4696 //        |    new |preserve|      Must be even-aligned.
 4697 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4698 //        |        |        |
 4699 //
 4700 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4701 //         known from SELF's arguments and the Java calling convention.
 4702 //         Region 6-7 is determined per call site.
 4703 // Note 2: If the calling convention leaves holes in the incoming argument
 4704 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4705 //         are owned by the CALLEE.  Holes should not be necessary in the
 4706 //         incoming area, as the Java calling convention is completely under
 4707 //         the control of the AD file.  Doubles can be sorted and packed to
 4708 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4709 //         varargs C calling conventions.
 4710 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4711 //         even aligned with pad0 as needed.
 4712 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4713 //         region 6-11 is even aligned; it may be padded out more so that
 4714 //         the region from SP to FP meets the minimum stack alignment.
 4715 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4716 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4717 //         SP meets the minimum alignment.
 4718 
 4719 frame
 4720 %{
 4721   // These three registers define part of the calling convention
 4722   // between compiled code and the interpreter.
 4723   inline_cache_reg(RAX);                // Inline Cache Register
 4724 
 4725   // Optional: name the operand used by cisc-spilling to access
 4726   // [stack_pointer + offset]
 4727   cisc_spilling_operand_name(indOffset32);
 4728 
 4729   // Number of stack slots consumed by locking an object
 4730   sync_stack_slots(2);
 4731 
 4732   // Compiled code's Frame Pointer
 4733   frame_pointer(RSP);
 4734 
 4735   // Stack alignment requirement
 4736   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4737 
 4738   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4739   // for calls to C.  Supports the var-args backing area for register parms.
 4740   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4741 
 4742   // The after-PROLOG location of the return address.  Location of
 4743   // return address specifies a type (REG or STACK) and a number
 4744   // representing the register number (i.e. - use a register name) or
 4745   // stack slot.
 4746   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4747   // Otherwise, it is above the locks and verification slot and alignment word
 4748   return_addr(STACK - 2 +
 4749               align_up((Compile::current()->in_preserve_stack_slots() +
 4750                         Compile::current()->fixed_slots()),
 4751                        stack_alignment_in_slots()));
 4752 
 4753   // Location of compiled Java return values.  Same as C for now.
 4754   return_value
 4755   %{
 4756     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4757            "only return normal values");
 4758 
 4759     static const int lo[Op_RegL + 1] = {
 4760       0,
 4761       0,
 4762       RAX_num,  // Op_RegN
 4763       RAX_num,  // Op_RegI
 4764       RAX_num,  // Op_RegP
 4765       XMM0_num, // Op_RegF
 4766       XMM0_num, // Op_RegD
 4767       RAX_num   // Op_RegL
 4768     };
 4769     static const int hi[Op_RegL + 1] = {
 4770       0,
 4771       0,
 4772       OptoReg::Bad, // Op_RegN
 4773       OptoReg::Bad, // Op_RegI
 4774       RAX_H_num,    // Op_RegP
 4775       OptoReg::Bad, // Op_RegF
 4776       XMM0b_num,    // Op_RegD
 4777       RAX_H_num     // Op_RegL
 4778     };
 4779     // Excluded flags and vector registers.
 4780     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4781     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4782   %}
 4783 %}
 4784 
 4785 //----------ATTRIBUTES---------------------------------------------------------
 4786 //----------Operand Attributes-------------------------------------------------
 4787 op_attrib op_cost(0);        // Required cost attribute
 4788 
 4789 //----------Instruction Attributes---------------------------------------------
 4790 ins_attrib ins_cost(100);       // Required cost attribute
 4791 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4792 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4793                                 // a non-matching short branch variant
 4794                                 // of some long branch?
 4795 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4796                                 // be a power of 2) specifies the
 4797                                 // alignment that some part of the
 4798                                 // instruction (not necessarily the
 4799                                 // start) requires.  If > 1, a
 4800                                 // compute_padding() function must be
 4801                                 // provided for the instruction
 4802 
 4803 // Whether this node is expanded during code emission into a sequence of
 4804 // instructions and the first instruction can perform an implicit null check.
 4805 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4806 
 4807 //----------OPERANDS-----------------------------------------------------------
 4808 // Operand definitions must precede instruction definitions for correct parsing
 4809 // in the ADLC because operands constitute user defined types which are used in
 4810 // instruction definitions.
 4811 
 4812 //----------Simple Operands----------------------------------------------------
 4813 // Immediate Operands
 4814 // Integer Immediate
 4815 operand immI()
 4816 %{
 4817   match(ConI);
 4818 
 4819   op_cost(10);
 4820   format %{ %}
 4821   interface(CONST_INTER);
 4822 %}
 4823 
 4824 // Constant for test vs zero
 4825 operand immI_0()
 4826 %{
 4827   predicate(n->get_int() == 0);
 4828   match(ConI);
 4829 
 4830   op_cost(0);
 4831   format %{ %}
 4832   interface(CONST_INTER);
 4833 %}
 4834 
 4835 // Constant for increment
 4836 operand immI_1()
 4837 %{
 4838   predicate(n->get_int() == 1);
 4839   match(ConI);
 4840 
 4841   op_cost(0);
 4842   format %{ %}
 4843   interface(CONST_INTER);
 4844 %}
 4845 
 4846 // Constant for decrement
 4847 operand immI_M1()
 4848 %{
 4849   predicate(n->get_int() == -1);
 4850   match(ConI);
 4851 
 4852   op_cost(0);
 4853   format %{ %}
 4854   interface(CONST_INTER);
 4855 %}
 4856 
 4857 operand immI_2()
 4858 %{
 4859   predicate(n->get_int() == 2);
 4860   match(ConI);
 4861 
 4862   op_cost(0);
 4863   format %{ %}
 4864   interface(CONST_INTER);
 4865 %}
 4866 
 4867 operand immI_4()
 4868 %{
 4869   predicate(n->get_int() == 4);
 4870   match(ConI);
 4871 
 4872   op_cost(0);
 4873   format %{ %}
 4874   interface(CONST_INTER);
 4875 %}
 4876 
 4877 operand immI_8()
 4878 %{
 4879   predicate(n->get_int() == 8);
 4880   match(ConI);
 4881 
 4882   op_cost(0);
 4883   format %{ %}
 4884   interface(CONST_INTER);
 4885 %}
 4886 
 4887 // Valid scale values for addressing modes
 4888 operand immI2()
 4889 %{
 4890   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4891   match(ConI);
 4892 
 4893   format %{ %}
 4894   interface(CONST_INTER);
 4895 %}
 4896 
 4897 operand immU7()
 4898 %{
 4899   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4900   match(ConI);
 4901 
 4902   op_cost(5);
 4903   format %{ %}
 4904   interface(CONST_INTER);
 4905 %}
 4906 
 4907 operand immI8()
 4908 %{
 4909   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4910   match(ConI);
 4911 
 4912   op_cost(5);
 4913   format %{ %}
 4914   interface(CONST_INTER);
 4915 %}
 4916 
 4917 operand immU8()
 4918 %{
 4919   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4920   match(ConI);
 4921 
 4922   op_cost(5);
 4923   format %{ %}
 4924   interface(CONST_INTER);
 4925 %}
 4926 
 4927 operand immI16()
 4928 %{
 4929   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4930   match(ConI);
 4931 
 4932   op_cost(10);
 4933   format %{ %}
 4934   interface(CONST_INTER);
 4935 %}
 4936 
 4937 // Int Immediate non-negative
 4938 operand immU31()
 4939 %{
 4940   predicate(n->get_int() >= 0);
 4941   match(ConI);
 4942 
 4943   op_cost(0);
 4944   format %{ %}
 4945   interface(CONST_INTER);
 4946 %}
 4947 
 4948 // Pointer Immediate
 4949 operand immP()
 4950 %{
 4951   match(ConP);
 4952 
 4953   op_cost(10);
 4954   format %{ %}
 4955   interface(CONST_INTER);
 4956 %}
 4957 
 4958 // Null Pointer Immediate
 4959 operand immP0()
 4960 %{
 4961   predicate(n->get_ptr() == 0);
 4962   match(ConP);
 4963 
 4964   op_cost(5);
 4965   format %{ %}
 4966   interface(CONST_INTER);
 4967 %}
 4968 
 4969 // Pointer Immediate
 4970 operand immN() %{
 4971   match(ConN);
 4972 
 4973   op_cost(10);
 4974   format %{ %}
 4975   interface(CONST_INTER);
 4976 %}
 4977 
 4978 operand immNKlass() %{
 4979   match(ConNKlass);
 4980 
 4981   op_cost(10);
 4982   format %{ %}
 4983   interface(CONST_INTER);
 4984 %}
 4985 
 4986 // Null Pointer Immediate
 4987 operand immN0() %{
 4988   predicate(n->get_narrowcon() == 0);
 4989   match(ConN);
 4990 
 4991   op_cost(5);
 4992   format %{ %}
 4993   interface(CONST_INTER);
 4994 %}
 4995 
 4996 operand immP31()
 4997 %{
 4998   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4999             && (n->get_ptr() >> 31) == 0);
 5000   match(ConP);
 5001 
 5002   op_cost(5);
 5003   format %{ %}
 5004   interface(CONST_INTER);
 5005 %}
 5006 
 5007 
 5008 // Long Immediate
 5009 operand immL()
 5010 %{
 5011   match(ConL);
 5012 
 5013   op_cost(20);
 5014   format %{ %}
 5015   interface(CONST_INTER);
 5016 %}
 5017 
 5018 // Long Immediate 8-bit
 5019 operand immL8()
 5020 %{
 5021   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 5022   match(ConL);
 5023 
 5024   op_cost(5);
 5025   format %{ %}
 5026   interface(CONST_INTER);
 5027 %}
 5028 
 5029 // Long Immediate 32-bit unsigned
 5030 operand immUL32()
 5031 %{
 5032   predicate(n->get_long() == (unsigned int) (n->get_long()));
 5033   match(ConL);
 5034 
 5035   op_cost(10);
 5036   format %{ %}
 5037   interface(CONST_INTER);
 5038 %}
 5039 
 5040 // Long Immediate 32-bit signed
 5041 operand immL32()
 5042 %{
 5043   predicate(n->get_long() == (int) (n->get_long()));
 5044   match(ConL);
 5045 
 5046   op_cost(15);
 5047   format %{ %}
 5048   interface(CONST_INTER);
 5049 %}
 5050 
 5051 operand immL_Pow2()
 5052 %{
 5053   predicate(is_power_of_2((julong)n->get_long()));
 5054   match(ConL);
 5055 
 5056   op_cost(15);
 5057   format %{ %}
 5058   interface(CONST_INTER);
 5059 %}
 5060 
 5061 operand immL_NotPow2()
 5062 %{
 5063   predicate(is_power_of_2((julong)~n->get_long()));
 5064   match(ConL);
 5065 
 5066   op_cost(15);
 5067   format %{ %}
 5068   interface(CONST_INTER);
 5069 %}
 5070 
 5071 // Long Immediate zero
 5072 operand immL0()
 5073 %{
 5074   predicate(n->get_long() == 0L);
 5075   match(ConL);
 5076 
 5077   op_cost(10);
 5078   format %{ %}
 5079   interface(CONST_INTER);
 5080 %}
 5081 
 5082 // Constant for increment
 5083 operand immL1()
 5084 %{
 5085   predicate(n->get_long() == 1);
 5086   match(ConL);
 5087 
 5088   format %{ %}
 5089   interface(CONST_INTER);
 5090 %}
 5091 
 5092 // Constant for decrement
 5093 operand immL_M1()
 5094 %{
 5095   predicate(n->get_long() == -1);
 5096   match(ConL);
 5097 
 5098   format %{ %}
 5099   interface(CONST_INTER);
 5100 %}
 5101 
 5102 // Long Immediate: low 32-bit mask
 5103 operand immL_32bits()
 5104 %{
 5105   predicate(n->get_long() == 0xFFFFFFFFL);
 5106   match(ConL);
 5107   op_cost(20);
 5108 
 5109   format %{ %}
 5110   interface(CONST_INTER);
 5111 %}
 5112 
 5113 // Int Immediate: 2^n-1, positive
 5114 operand immI_Pow2M1()
 5115 %{
 5116   predicate((n->get_int() > 0)
 5117             && is_power_of_2((juint)n->get_int() + 1));
 5118   match(ConI);
 5119 
 5120   op_cost(20);
 5121   format %{ %}
 5122   interface(CONST_INTER);
 5123 %}
 5124 
 5125 // Float Immediate zero
 5126 operand immF0()
 5127 %{
 5128   predicate(jint_cast(n->getf()) == 0);
 5129   match(ConF);
 5130 
 5131   op_cost(5);
 5132   format %{ %}
 5133   interface(CONST_INTER);
 5134 %}
 5135 
 5136 // Float Immediate
 5137 operand immF()
 5138 %{
 5139   match(ConF);
 5140 
 5141   op_cost(15);
 5142   format %{ %}
 5143   interface(CONST_INTER);
 5144 %}
 5145 
 5146 // Half Float Immediate
 5147 operand immH()
 5148 %{
 5149   match(ConH);
 5150 
 5151   op_cost(15);
 5152   format %{ %}
 5153   interface(CONST_INTER);
 5154 %}
 5155 
 5156 // Double Immediate zero
 5157 operand immD0()
 5158 %{
 5159   predicate(jlong_cast(n->getd()) == 0);
 5160   match(ConD);
 5161 
 5162   op_cost(5);
 5163   format %{ %}
 5164   interface(CONST_INTER);
 5165 %}
 5166 
 5167 // Double Immediate
 5168 operand immD()
 5169 %{
 5170   match(ConD);
 5171 
 5172   op_cost(15);
 5173   format %{ %}
 5174   interface(CONST_INTER);
 5175 %}
 5176 
 5177 // Immediates for special shifts (sign extend)
 5178 
 5179 // Constants for increment
 5180 operand immI_16()
 5181 %{
 5182   predicate(n->get_int() == 16);
 5183   match(ConI);
 5184 
 5185   format %{ %}
 5186   interface(CONST_INTER);
 5187 %}
 5188 
 5189 operand immI_24()
 5190 %{
 5191   predicate(n->get_int() == 24);
 5192   match(ConI);
 5193 
 5194   format %{ %}
 5195   interface(CONST_INTER);
 5196 %}
 5197 
 5198 // Constant for byte-wide masking
 5199 operand immI_255()
 5200 %{
 5201   predicate(n->get_int() == 255);
 5202   match(ConI);
 5203 
 5204   format %{ %}
 5205   interface(CONST_INTER);
 5206 %}
 5207 
 5208 // Constant for short-wide masking
 5209 operand immI_65535()
 5210 %{
 5211   predicate(n->get_int() == 65535);
 5212   match(ConI);
 5213 
 5214   format %{ %}
 5215   interface(CONST_INTER);
 5216 %}
 5217 
 5218 // Constant for byte-wide masking
 5219 operand immL_255()
 5220 %{
 5221   predicate(n->get_long() == 255);
 5222   match(ConL);
 5223 
 5224   format %{ %}
 5225   interface(CONST_INTER);
 5226 %}
 5227 
 5228 // Constant for short-wide masking
 5229 operand immL_65535()
 5230 %{
 5231   predicate(n->get_long() == 65535);
 5232   match(ConL);
 5233 
 5234   format %{ %}
 5235   interface(CONST_INTER);
 5236 %}
 5237 
 5238 // AOT Runtime Constants Address
 5239 operand immAOTRuntimeConstantsAddress()
 5240 %{
 5241   // Check if the address is in the range of AOT Runtime Constants
 5242   predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
 5243   match(ConP);
 5244 
 5245   op_cost(0);
 5246   format %{ %}
 5247   interface(CONST_INTER);
 5248 %}
 5249 
 5250 operand kReg()
 5251 %{
 5252   constraint(ALLOC_IN_RC(vectmask_reg));
 5253   match(RegVectMask);
 5254   format %{%}
 5255   interface(REG_INTER);
 5256 %}
 5257 
 5258 // Register Operands
 5259 // Integer Register
 5260 operand rRegI()
 5261 %{
 5262   constraint(ALLOC_IN_RC(int_reg));
 5263   match(RegI);
 5264 
 5265   match(rax_RegI);
 5266   match(rbx_RegI);
 5267   match(rcx_RegI);
 5268   match(rdx_RegI);
 5269   match(rdi_RegI);
 5270 
 5271   format %{ %}
 5272   interface(REG_INTER);
 5273 %}
 5274 
 5275 // Special Registers
 5276 operand rax_RegI()
 5277 %{
 5278   constraint(ALLOC_IN_RC(int_rax_reg));
 5279   match(RegI);
 5280   match(rRegI);
 5281 
 5282   format %{ "RAX" %}
 5283   interface(REG_INTER);
 5284 %}
 5285 
 5286 // Special Registers
 5287 operand rbx_RegI()
 5288 %{
 5289   constraint(ALLOC_IN_RC(int_rbx_reg));
 5290   match(RegI);
 5291   match(rRegI);
 5292 
 5293   format %{ "RBX" %}
 5294   interface(REG_INTER);
 5295 %}
 5296 
 5297 operand rcx_RegI()
 5298 %{
 5299   constraint(ALLOC_IN_RC(int_rcx_reg));
 5300   match(RegI);
 5301   match(rRegI);
 5302 
 5303   format %{ "RCX" %}
 5304   interface(REG_INTER);
 5305 %}
 5306 
 5307 operand rdx_RegI()
 5308 %{
 5309   constraint(ALLOC_IN_RC(int_rdx_reg));
 5310   match(RegI);
 5311   match(rRegI);
 5312 
 5313   format %{ "RDX" %}
 5314   interface(REG_INTER);
 5315 %}
 5316 
 5317 operand rdi_RegI()
 5318 %{
 5319   constraint(ALLOC_IN_RC(int_rdi_reg));
 5320   match(RegI);
 5321   match(rRegI);
 5322 
 5323   format %{ "RDI" %}
 5324   interface(REG_INTER);
 5325 %}
 5326 
 5327 operand no_rax_rdx_RegI()
 5328 %{
 5329   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5330   match(RegI);
 5331   match(rbx_RegI);
 5332   match(rcx_RegI);
 5333   match(rdi_RegI);
 5334 
 5335   format %{ %}
 5336   interface(REG_INTER);
 5337 %}
 5338 
 5339 operand no_rbp_r13_RegI()
 5340 %{
 5341   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5342   match(RegI);
 5343   match(rRegI);
 5344   match(rax_RegI);
 5345   match(rbx_RegI);
 5346   match(rcx_RegI);
 5347   match(rdx_RegI);
 5348   match(rdi_RegI);
 5349 
 5350   format %{ %}
 5351   interface(REG_INTER);
 5352 %}
 5353 
 5354 // Pointer Register
 5355 operand any_RegP()
 5356 %{
 5357   constraint(ALLOC_IN_RC(any_reg));
 5358   match(RegP);
 5359   match(rax_RegP);
 5360   match(rbx_RegP);
 5361   match(rdi_RegP);
 5362   match(rsi_RegP);
 5363   match(rbp_RegP);
 5364   match(r15_RegP);
 5365   match(rRegP);
 5366 
 5367   format %{ %}
 5368   interface(REG_INTER);
 5369 %}
 5370 
 5371 operand rRegP()
 5372 %{
 5373   constraint(ALLOC_IN_RC(ptr_reg));
 5374   match(RegP);
 5375   match(rax_RegP);
 5376   match(rbx_RegP);
 5377   match(rdi_RegP);
 5378   match(rsi_RegP);
 5379   match(rbp_RegP);  // See Q&A below about
 5380   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5381 
 5382   format %{ %}
 5383   interface(REG_INTER);
 5384 %}
 5385 
 5386 operand rRegN() %{
 5387   constraint(ALLOC_IN_RC(int_reg));
 5388   match(RegN);
 5389 
 5390   format %{ %}
 5391   interface(REG_INTER);
 5392 %}
 5393 
 5394 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5395 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5396 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5397 // The output of an instruction is controlled by the allocator, which respects
 5398 // register class masks, not match rules.  Unless an instruction mentions
 5399 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5400 // by the allocator as an input.
 5401 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5402 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5403 // result, RBP is not included in the output of the instruction either.
 5404 
 5405 // This operand is not allowed to use RBP even if
 5406 // RBP is not used to hold the frame pointer.
 5407 operand no_rbp_RegP()
 5408 %{
 5409   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5410   match(RegP);
 5411   match(rbx_RegP);
 5412   match(rsi_RegP);
 5413   match(rdi_RegP);
 5414 
 5415   format %{ %}
 5416   interface(REG_INTER);
 5417 %}
 5418 
 5419 // Special Registers
 5420 // Return a pointer value
 5421 operand rax_RegP()
 5422 %{
 5423   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5424   match(RegP);
 5425   match(rRegP);
 5426 
 5427   format %{ %}
 5428   interface(REG_INTER);
 5429 %}
 5430 
 5431 // Special Registers
 5432 // Return a compressed pointer value
 5433 operand rax_RegN()
 5434 %{
 5435   constraint(ALLOC_IN_RC(int_rax_reg));
 5436   match(RegN);
 5437   match(rRegN);
 5438 
 5439   format %{ %}
 5440   interface(REG_INTER);
 5441 %}
 5442 
 5443 // Used in AtomicAdd
 5444 operand rbx_RegP()
 5445 %{
 5446   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5447   match(RegP);
 5448   match(rRegP);
 5449 
 5450   format %{ %}
 5451   interface(REG_INTER);
 5452 %}
 5453 
 5454 operand rsi_RegP()
 5455 %{
 5456   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5457   match(RegP);
 5458   match(rRegP);
 5459 
 5460   format %{ %}
 5461   interface(REG_INTER);
 5462 %}
 5463 
 5464 operand rbp_RegP()
 5465 %{
 5466   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5467   match(RegP);
 5468   match(rRegP);
 5469 
 5470   format %{ %}
 5471   interface(REG_INTER);
 5472 %}
 5473 
 5474 // Used in rep stosq
 5475 operand rdi_RegP()
 5476 %{
 5477   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5478   match(RegP);
 5479   match(rRegP);
 5480 
 5481   format %{ %}
 5482   interface(REG_INTER);
 5483 %}
 5484 
 5485 operand r15_RegP()
 5486 %{
 5487   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5488   match(RegP);
 5489   match(rRegP);
 5490 
 5491   format %{ %}
 5492   interface(REG_INTER);
 5493 %}
 5494 
 5495 operand rRegL()
 5496 %{
 5497   constraint(ALLOC_IN_RC(long_reg));
 5498   match(RegL);
 5499   match(rax_RegL);
 5500   match(rdx_RegL);
 5501 
 5502   format %{ %}
 5503   interface(REG_INTER);
 5504 %}
 5505 
 5506 // Special Registers
 5507 operand no_rax_rdx_RegL()
 5508 %{
 5509   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5510   match(RegL);
 5511   match(rRegL);
 5512 
 5513   format %{ %}
 5514   interface(REG_INTER);
 5515 %}
 5516 
 5517 operand rax_RegL()
 5518 %{
 5519   constraint(ALLOC_IN_RC(long_rax_reg));
 5520   match(RegL);
 5521   match(rRegL);
 5522 
 5523   format %{ "RAX" %}
 5524   interface(REG_INTER);
 5525 %}
 5526 
 5527 operand rcx_RegL()
 5528 %{
 5529   constraint(ALLOC_IN_RC(long_rcx_reg));
 5530   match(RegL);
 5531   match(rRegL);
 5532 
 5533   format %{ %}
 5534   interface(REG_INTER);
 5535 %}
 5536 
 5537 operand rdx_RegL()
 5538 %{
 5539   constraint(ALLOC_IN_RC(long_rdx_reg));
 5540   match(RegL);
 5541   match(rRegL);
 5542 
 5543   format %{ %}
 5544   interface(REG_INTER);
 5545 %}
 5546 
 5547 operand r11_RegL()
 5548 %{
 5549   constraint(ALLOC_IN_RC(long_r11_reg));
 5550   match(RegL);
 5551   match(rRegL);
 5552 
 5553   format %{ %}
 5554   interface(REG_INTER);
 5555 %}
 5556 
 5557 operand no_rbp_r13_RegL()
 5558 %{
 5559   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5560   match(RegL);
 5561   match(rRegL);
 5562   match(rax_RegL);
 5563   match(rcx_RegL);
 5564   match(rdx_RegL);
 5565 
 5566   format %{ %}
 5567   interface(REG_INTER);
 5568 %}
 5569 
 5570 // Flags register, used as output of compare instructions
 5571 operand rFlagsReg()
 5572 %{
 5573   constraint(ALLOC_IN_RC(int_flags));
 5574   match(RegFlags);
 5575 
 5576   format %{ "RFLAGS" %}
 5577   interface(REG_INTER);
 5578 %}
 5579 
 5580 // Flags register, used as output of FLOATING POINT compare instructions
 5581 operand rFlagsRegU()
 5582 %{
 5583   constraint(ALLOC_IN_RC(int_flags));
 5584   match(RegFlags);
 5585 
 5586   format %{ "RFLAGS_U" %}
 5587   interface(REG_INTER);
 5588 %}
 5589 
 5590 operand rFlagsRegUCF() %{
 5591   constraint(ALLOC_IN_RC(int_flags));
 5592   match(RegFlags);
 5593   predicate(!UseAPX || !VM_Version::supports_avx10_2());
 5594 
 5595   format %{ "RFLAGS_U_CF" %}
 5596   interface(REG_INTER);
 5597 %}
 5598 
 5599 operand rFlagsRegUCFE() %{
 5600   constraint(ALLOC_IN_RC(int_flags));
 5601   match(RegFlags);
 5602   predicate(UseAPX && VM_Version::supports_avx10_2());
 5603 
 5604   format %{ "RFLAGS_U_CFE" %}
 5605   interface(REG_INTER);
 5606 %}
 5607 
 5608 // Float register operands
 5609 operand regF() %{
 5610    constraint(ALLOC_IN_RC(float_reg));
 5611    match(RegF);
 5612 
 5613    format %{ %}
 5614    interface(REG_INTER);
 5615 %}
 5616 
 5617 // Float register operands
 5618 operand legRegF() %{
 5619    constraint(ALLOC_IN_RC(float_reg_legacy));
 5620    match(RegF);
 5621 
 5622    format %{ %}
 5623    interface(REG_INTER);
 5624 %}
 5625 
 5626 // Float register operands
 5627 operand vlRegF() %{
 5628    constraint(ALLOC_IN_RC(float_reg_vl));
 5629    match(RegF);
 5630 
 5631    format %{ %}
 5632    interface(REG_INTER);
 5633 %}
 5634 
 5635 // Double register operands
 5636 operand regD() %{
 5637    constraint(ALLOC_IN_RC(double_reg));
 5638    match(RegD);
 5639 
 5640    format %{ %}
 5641    interface(REG_INTER);
 5642 %}
 5643 
 5644 // Double register operands
 5645 operand legRegD() %{
 5646    constraint(ALLOC_IN_RC(double_reg_legacy));
 5647    match(RegD);
 5648 
 5649    format %{ %}
 5650    interface(REG_INTER);
 5651 %}
 5652 
 5653 // Double register operands
 5654 operand vlRegD() %{
 5655    constraint(ALLOC_IN_RC(double_reg_vl));
 5656    match(RegD);
 5657 
 5658    format %{ %}
 5659    interface(REG_INTER);
 5660 %}
 5661 
 5662 //----------Memory Operands----------------------------------------------------
 5663 // Direct Memory Operand
 5664 // operand direct(immP addr)
 5665 // %{
 5666 //   match(addr);
 5667 
 5668 //   format %{ "[$addr]" %}
 5669 //   interface(MEMORY_INTER) %{
 5670 //     base(0xFFFFFFFF);
 5671 //     index(0x4);
 5672 //     scale(0x0);
 5673 //     disp($addr);
 5674 //   %}
 5675 // %}
 5676 
 5677 // Indirect Memory Operand
 5678 operand indirect(any_RegP reg)
 5679 %{
 5680   constraint(ALLOC_IN_RC(ptr_reg));
 5681   match(reg);
 5682 
 5683   format %{ "[$reg]" %}
 5684   interface(MEMORY_INTER) %{
 5685     base($reg);
 5686     index(0x4);
 5687     scale(0x0);
 5688     disp(0x0);
 5689   %}
 5690 %}
 5691 
 5692 // Indirect Memory Plus Short Offset Operand
 5693 operand indOffset8(any_RegP reg, immL8 off)
 5694 %{
 5695   constraint(ALLOC_IN_RC(ptr_reg));
 5696   match(AddP reg off);
 5697 
 5698   format %{ "[$reg + $off (8-bit)]" %}
 5699   interface(MEMORY_INTER) %{
 5700     base($reg);
 5701     index(0x4);
 5702     scale(0x0);
 5703     disp($off);
 5704   %}
 5705 %}
 5706 
 5707 // Indirect Memory Plus Long Offset Operand
 5708 operand indOffset32(any_RegP reg, immL32 off)
 5709 %{
 5710   constraint(ALLOC_IN_RC(ptr_reg));
 5711   match(AddP reg off);
 5712 
 5713   format %{ "[$reg + $off (32-bit)]" %}
 5714   interface(MEMORY_INTER) %{
 5715     base($reg);
 5716     index(0x4);
 5717     scale(0x0);
 5718     disp($off);
 5719   %}
 5720 %}
 5721 
 5722 // Indirect Memory Plus Index Register Plus Offset Operand
 5723 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5724 %{
 5725   constraint(ALLOC_IN_RC(ptr_reg));
 5726   match(AddP (AddP reg lreg) off);
 5727 
 5728   op_cost(10);
 5729   format %{"[$reg + $off + $lreg]" %}
 5730   interface(MEMORY_INTER) %{
 5731     base($reg);
 5732     index($lreg);
 5733     scale(0x0);
 5734     disp($off);
 5735   %}
 5736 %}
 5737 
 5738 // Indirect Memory Plus Index Register Plus Offset Operand
 5739 operand indIndex(any_RegP reg, rRegL lreg)
 5740 %{
 5741   constraint(ALLOC_IN_RC(ptr_reg));
 5742   match(AddP reg lreg);
 5743 
 5744   op_cost(10);
 5745   format %{"[$reg + $lreg]" %}
 5746   interface(MEMORY_INTER) %{
 5747     base($reg);
 5748     index($lreg);
 5749     scale(0x0);
 5750     disp(0x0);
 5751   %}
 5752 %}
 5753 
 5754 // Indirect Memory Times Scale Plus Index Register
 5755 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5756 %{
 5757   constraint(ALLOC_IN_RC(ptr_reg));
 5758   match(AddP reg (LShiftL lreg scale));
 5759 
 5760   op_cost(10);
 5761   format %{"[$reg + $lreg << $scale]" %}
 5762   interface(MEMORY_INTER) %{
 5763     base($reg);
 5764     index($lreg);
 5765     scale($scale);
 5766     disp(0x0);
 5767   %}
 5768 %}
 5769 
 5770 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5771 %{
 5772   constraint(ALLOC_IN_RC(ptr_reg));
 5773   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5774   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5775 
 5776   op_cost(10);
 5777   format %{"[$reg + pos $idx << $scale]" %}
 5778   interface(MEMORY_INTER) %{
 5779     base($reg);
 5780     index($idx);
 5781     scale($scale);
 5782     disp(0x0);
 5783   %}
 5784 %}
 5785 
 5786 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5787 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5788 %{
 5789   constraint(ALLOC_IN_RC(ptr_reg));
 5790   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5791 
 5792   op_cost(10);
 5793   format %{"[$reg + $off + $lreg << $scale]" %}
 5794   interface(MEMORY_INTER) %{
 5795     base($reg);
 5796     index($lreg);
 5797     scale($scale);
 5798     disp($off);
 5799   %}
 5800 %}
 5801 
 5802 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5803 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5804 %{
 5805   constraint(ALLOC_IN_RC(ptr_reg));
 5806   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5807   match(AddP (AddP reg (ConvI2L idx)) off);
 5808 
 5809   op_cost(10);
 5810   format %{"[$reg + $off + $idx]" %}
 5811   interface(MEMORY_INTER) %{
 5812     base($reg);
 5813     index($idx);
 5814     scale(0x0);
 5815     disp($off);
 5816   %}
 5817 %}
 5818 
 5819 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5820 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5821 %{
 5822   constraint(ALLOC_IN_RC(ptr_reg));
 5823   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5824   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5825 
 5826   op_cost(10);
 5827   format %{"[$reg + $off + $idx << $scale]" %}
 5828   interface(MEMORY_INTER) %{
 5829     base($reg);
 5830     index($idx);
 5831     scale($scale);
 5832     disp($off);
 5833   %}
 5834 %}
 5835 
 5836 // Indirect Narrow Oop Operand
 5837 operand indCompressedOop(rRegN reg) %{
 5838   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5839   constraint(ALLOC_IN_RC(ptr_reg));
 5840   match(DecodeN reg);
 5841 
 5842   op_cost(10);
 5843   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5844   interface(MEMORY_INTER) %{
 5845     base(0xc); // R12
 5846     index($reg);
 5847     scale(0x3);
 5848     disp(0x0);
 5849   %}
 5850 %}
 5851 
 5852 // Indirect Narrow Oop Plus Offset Operand
 5853 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5854 // we can't free r12 even with CompressedOops::base() == nullptr.
 5855 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5856   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5857   constraint(ALLOC_IN_RC(ptr_reg));
 5858   match(AddP (DecodeN reg) off);
 5859 
 5860   op_cost(10);
 5861   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5862   interface(MEMORY_INTER) %{
 5863     base(0xc); // R12
 5864     index($reg);
 5865     scale(0x3);
 5866     disp($off);
 5867   %}
 5868 %}
 5869 
 5870 // Indirect Memory Operand
 5871 operand indirectNarrow(rRegN reg)
 5872 %{
 5873   predicate(CompressedOops::shift() == 0);
 5874   constraint(ALLOC_IN_RC(ptr_reg));
 5875   match(DecodeN reg);
 5876 
 5877   format %{ "[$reg]" %}
 5878   interface(MEMORY_INTER) %{
 5879     base($reg);
 5880     index(0x4);
 5881     scale(0x0);
 5882     disp(0x0);
 5883   %}
 5884 %}
 5885 
 5886 // Indirect Memory Plus Short Offset Operand
 5887 operand indOffset8Narrow(rRegN reg, immL8 off)
 5888 %{
 5889   predicate(CompressedOops::shift() == 0);
 5890   constraint(ALLOC_IN_RC(ptr_reg));
 5891   match(AddP (DecodeN reg) off);
 5892 
 5893   format %{ "[$reg + $off (8-bit)]" %}
 5894   interface(MEMORY_INTER) %{
 5895     base($reg);
 5896     index(0x4);
 5897     scale(0x0);
 5898     disp($off);
 5899   %}
 5900 %}
 5901 
 5902 // Indirect Memory Plus Long Offset Operand
 5903 operand indOffset32Narrow(rRegN reg, immL32 off)
 5904 %{
 5905   predicate(CompressedOops::shift() == 0);
 5906   constraint(ALLOC_IN_RC(ptr_reg));
 5907   match(AddP (DecodeN reg) off);
 5908 
 5909   format %{ "[$reg + $off (32-bit)]" %}
 5910   interface(MEMORY_INTER) %{
 5911     base($reg);
 5912     index(0x4);
 5913     scale(0x0);
 5914     disp($off);
 5915   %}
 5916 %}
 5917 
 5918 // Indirect Memory Plus Index Register Plus Offset Operand
 5919 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5920 %{
 5921   predicate(CompressedOops::shift() == 0);
 5922   constraint(ALLOC_IN_RC(ptr_reg));
 5923   match(AddP (AddP (DecodeN reg) lreg) off);
 5924 
 5925   op_cost(10);
 5926   format %{"[$reg + $off + $lreg]" %}
 5927   interface(MEMORY_INTER) %{
 5928     base($reg);
 5929     index($lreg);
 5930     scale(0x0);
 5931     disp($off);
 5932   %}
 5933 %}
 5934 
 5935 // Indirect Memory Plus Index Register Plus Offset Operand
 5936 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5937 %{
 5938   predicate(CompressedOops::shift() == 0);
 5939   constraint(ALLOC_IN_RC(ptr_reg));
 5940   match(AddP (DecodeN reg) lreg);
 5941 
 5942   op_cost(10);
 5943   format %{"[$reg + $lreg]" %}
 5944   interface(MEMORY_INTER) %{
 5945     base($reg);
 5946     index($lreg);
 5947     scale(0x0);
 5948     disp(0x0);
 5949   %}
 5950 %}
 5951 
 5952 // Indirect Memory Times Scale Plus Index Register
 5953 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5954 %{
 5955   predicate(CompressedOops::shift() == 0);
 5956   constraint(ALLOC_IN_RC(ptr_reg));
 5957   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5958 
 5959   op_cost(10);
 5960   format %{"[$reg + $lreg << $scale]" %}
 5961   interface(MEMORY_INTER) %{
 5962     base($reg);
 5963     index($lreg);
 5964     scale($scale);
 5965     disp(0x0);
 5966   %}
 5967 %}
 5968 
 5969 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5970 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5971 %{
 5972   predicate(CompressedOops::shift() == 0);
 5973   constraint(ALLOC_IN_RC(ptr_reg));
 5974   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5975 
 5976   op_cost(10);
 5977   format %{"[$reg + $off + $lreg << $scale]" %}
 5978   interface(MEMORY_INTER) %{
 5979     base($reg);
 5980     index($lreg);
 5981     scale($scale);
 5982     disp($off);
 5983   %}
 5984 %}
 5985 
 5986 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5987 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5988 %{
 5989   constraint(ALLOC_IN_RC(ptr_reg));
 5990   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5991   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5992 
 5993   op_cost(10);
 5994   format %{"[$reg + $off + $idx]" %}
 5995   interface(MEMORY_INTER) %{
 5996     base($reg);
 5997     index($idx);
 5998     scale(0x0);
 5999     disp($off);
 6000   %}
 6001 %}
 6002 
 6003 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 6004 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 6005 %{
 6006   constraint(ALLOC_IN_RC(ptr_reg));
 6007   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 6008   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 6009 
 6010   op_cost(10);
 6011   format %{"[$reg + $off + $idx << $scale]" %}
 6012   interface(MEMORY_INTER) %{
 6013     base($reg);
 6014     index($idx);
 6015     scale($scale);
 6016     disp($off);
 6017   %}
 6018 %}
 6019 
 6020 //----------Special Memory Operands--------------------------------------------
 6021 // Stack Slot Operand - This operand is used for loading and storing temporary
 6022 //                      values on the stack where a match requires a value to
 6023 //                      flow through memory.
 6024 operand stackSlotP(sRegP reg)
 6025 %{
 6026   constraint(ALLOC_IN_RC(stack_slots));
 6027   // No match rule because this operand is only generated in matching
 6028 
 6029   format %{ "[$reg]" %}
 6030   interface(MEMORY_INTER) %{
 6031     base(0x4);   // RSP
 6032     index(0x4);  // No Index
 6033     scale(0x0);  // No Scale
 6034     disp($reg);  // Stack Offset
 6035   %}
 6036 %}
 6037 
 6038 operand stackSlotI(sRegI reg)
 6039 %{
 6040   constraint(ALLOC_IN_RC(stack_slots));
 6041   // No match rule because this operand is only generated in matching
 6042 
 6043   format %{ "[$reg]" %}
 6044   interface(MEMORY_INTER) %{
 6045     base(0x4);   // RSP
 6046     index(0x4);  // No Index
 6047     scale(0x0);  // No Scale
 6048     disp($reg);  // Stack Offset
 6049   %}
 6050 %}
 6051 
 6052 operand stackSlotF(sRegF reg)
 6053 %{
 6054   constraint(ALLOC_IN_RC(stack_slots));
 6055   // No match rule because this operand is only generated in matching
 6056 
 6057   format %{ "[$reg]" %}
 6058   interface(MEMORY_INTER) %{
 6059     base(0x4);   // RSP
 6060     index(0x4);  // No Index
 6061     scale(0x0);  // No Scale
 6062     disp($reg);  // Stack Offset
 6063   %}
 6064 %}
 6065 
 6066 operand stackSlotD(sRegD reg)
 6067 %{
 6068   constraint(ALLOC_IN_RC(stack_slots));
 6069   // No match rule because this operand is only generated in matching
 6070 
 6071   format %{ "[$reg]" %}
 6072   interface(MEMORY_INTER) %{
 6073     base(0x4);   // RSP
 6074     index(0x4);  // No Index
 6075     scale(0x0);  // No Scale
 6076     disp($reg);  // Stack Offset
 6077   %}
 6078 %}
 6079 operand stackSlotL(sRegL reg)
 6080 %{
 6081   constraint(ALLOC_IN_RC(stack_slots));
 6082   // No match rule because this operand is only generated in matching
 6083 
 6084   format %{ "[$reg]" %}
 6085   interface(MEMORY_INTER) %{
 6086     base(0x4);   // RSP
 6087     index(0x4);  // No Index
 6088     scale(0x0);  // No Scale
 6089     disp($reg);  // Stack Offset
 6090   %}
 6091 %}
 6092 
 6093 //----------Conditional Branch Operands----------------------------------------
 6094 // Comparison Op  - This is the operation of the comparison, and is limited to
 6095 //                  the following set of codes:
 6096 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6097 //
 6098 // Other attributes of the comparison, such as unsignedness, are specified
 6099 // by the comparison instruction that sets a condition code flags register.
 6100 // That result is represented by a flags operand whose subtype is appropriate
 6101 // to the unsignedness (etc.) of the comparison.
 6102 //
 6103 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6104 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6105 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6106 
 6107 // Comparison Code
 6108 operand cmpOp()
 6109 %{
 6110   match(Bool);
 6111 
 6112   format %{ "" %}
 6113   interface(COND_INTER) %{
 6114     equal(0x4, "e");
 6115     not_equal(0x5, "ne");
 6116     less(0xc, "l");
 6117     greater_equal(0xd, "ge");
 6118     less_equal(0xe, "le");
 6119     greater(0xf, "g");
 6120     overflow(0x0, "o");
 6121     no_overflow(0x1, "no");
 6122   %}
 6123 %}
 6124 
 6125 // Comparison Code, unsigned compare.  Used by FP also, with
 6126 // C2 (unordered) turned into GT or LT already.  The other bits
 6127 // C0 and C3 are turned into Carry & Zero flags.
 6128 operand cmpOpU()
 6129 %{
 6130   match(Bool);
 6131 
 6132   format %{ "" %}
 6133   interface(COND_INTER) %{
 6134     equal(0x4, "e");
 6135     not_equal(0x5, "ne");
 6136     less(0x2, "b");
 6137     greater_equal(0x3, "ae");
 6138     less_equal(0x6, "be");
 6139     greater(0x7, "a");
 6140     overflow(0x0, "o");
 6141     no_overflow(0x1, "no");
 6142   %}
 6143 %}
 6144 
 6145 
 6146 // Floating comparisons that don't require any fixup for the unordered case,
 6147 // If both inputs of the comparison are the same, ZF is always set so we
 6148 // don't need to use cmpOpUCF2 for eq/ne
 6149 operand cmpOpUCF() %{
 6150   match(Bool);
 6151   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6152             (n->as_Bool()->_test._test == BoolTest::lt ||
 6153              n->as_Bool()->_test._test == BoolTest::ge ||
 6154              n->as_Bool()->_test._test == BoolTest::le ||
 6155              n->as_Bool()->_test._test == BoolTest::gt ||
 6156              n->in(1)->in(1) == n->in(1)->in(2)));
 6157   format %{ "" %}
 6158   interface(COND_INTER) %{
 6159     equal(0xb, "np");
 6160     not_equal(0xa, "p");
 6161     less(0x2, "b");
 6162     greater_equal(0x3, "ae");
 6163     less_equal(0x6, "be");
 6164     greater(0x7, "a");
 6165     overflow(0x0, "o");
 6166     no_overflow(0x1, "no");
 6167   %}
 6168 %}
 6169 
 6170 
 6171 // Floating comparisons that can be fixed up with extra conditional jumps
 6172 operand cmpOpUCF2() %{
 6173   match(Bool);
 6174   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6175             (n->as_Bool()->_test._test == BoolTest::ne ||
 6176              n->as_Bool()->_test._test == BoolTest::eq) &&
 6177             n->in(1)->in(1) != n->in(1)->in(2));
 6178   format %{ "" %}
 6179   interface(COND_INTER) %{
 6180     equal(0x4, "e");
 6181     not_equal(0x5, "ne");
 6182     less(0x2, "b");
 6183     greater_equal(0x3, "ae");
 6184     less_equal(0x6, "be");
 6185     greater(0x7, "a");
 6186     overflow(0x0, "o");
 6187     no_overflow(0x1, "no");
 6188   %}
 6189 %}
 6190 
 6191 
 6192 // Floating point comparisons that set condition flags to test more directly,
 6193 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
 6194 // are used for L (<) and LE (<=) conditions. It's important to convert these
 6195 // latter conditions to ones that use unsigned tests before passing into an
 6196 // instruction because the preceding comparison might be based on a three way
 6197 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
 6198 operand cmpOpUCFE()
 6199 %{
 6200   match(Bool);
 6201   predicate((UseAPX && VM_Version::supports_avx10_2()) &&
 6202             (n->as_Bool()->_test._test == BoolTest::ne ||
 6203              n->as_Bool()->_test._test == BoolTest::eq ||
 6204              n->as_Bool()->_test._test == BoolTest::lt ||
 6205              n->as_Bool()->_test._test == BoolTest::ge ||
 6206              n->as_Bool()->_test._test == BoolTest::le ||
 6207              n->as_Bool()->_test._test == BoolTest::gt));
 6208 
 6209   format %{ "" %}
 6210   interface(COND_INTER) %{
 6211     equal(0x4, "e");
 6212     not_equal(0x5, "ne");
 6213     less(0x2, "b");
 6214     greater_equal(0x3, "ae");
 6215     less_equal(0x6, "be");
 6216     greater(0x7, "a");
 6217     overflow(0x0, "o");
 6218     no_overflow(0x1, "no");
 6219   %}
 6220 %}
 6221 
 6222 // Operands for bound floating pointer register arguments
 6223 operand rxmm0() %{
 6224   constraint(ALLOC_IN_RC(xmm0_reg));
 6225   match(VecX);
 6226   format%{%}
 6227   interface(REG_INTER);
 6228 %}
 6229 
 6230 // Vectors
 6231 
 6232 // Dummy generic vector class. Should be used for all vector operands.
 6233 // Replaced with vec[SDXYZ] during post-selection pass.
 6234 operand vec() %{
 6235   constraint(ALLOC_IN_RC(dynamic));
 6236   match(VecX);
 6237   match(VecY);
 6238   match(VecZ);
 6239   match(VecS);
 6240   match(VecD);
 6241 
 6242   format %{ %}
 6243   interface(REG_INTER);
 6244 %}
 6245 
 6246 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6247 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6248 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6249 // runtime code generation via reg_class_dynamic.
 6250 operand legVec() %{
 6251   constraint(ALLOC_IN_RC(dynamic));
 6252   match(VecX);
 6253   match(VecY);
 6254   match(VecZ);
 6255   match(VecS);
 6256   match(VecD);
 6257 
 6258   format %{ %}
 6259   interface(REG_INTER);
 6260 %}
 6261 
 6262 // Replaces vec during post-selection cleanup. See above.
 6263 operand vecS() %{
 6264   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6265   match(VecS);
 6266 
 6267   format %{ %}
 6268   interface(REG_INTER);
 6269 %}
 6270 
 6271 // Replaces legVec during post-selection cleanup. See above.
 6272 operand legVecS() %{
 6273   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6274   match(VecS);
 6275 
 6276   format %{ %}
 6277   interface(REG_INTER);
 6278 %}
 6279 
 6280 // Replaces vec during post-selection cleanup. See above.
 6281 operand vecD() %{
 6282   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6283   match(VecD);
 6284 
 6285   format %{ %}
 6286   interface(REG_INTER);
 6287 %}
 6288 
 6289 // Replaces legVec during post-selection cleanup. See above.
 6290 operand legVecD() %{
 6291   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6292   match(VecD);
 6293 
 6294   format %{ %}
 6295   interface(REG_INTER);
 6296 %}
 6297 
 6298 // Replaces vec during post-selection cleanup. See above.
 6299 operand vecX() %{
 6300   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6301   match(VecX);
 6302 
 6303   format %{ %}
 6304   interface(REG_INTER);
 6305 %}
 6306 
 6307 // Replaces legVec during post-selection cleanup. See above.
 6308 operand legVecX() %{
 6309   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6310   match(VecX);
 6311 
 6312   format %{ %}
 6313   interface(REG_INTER);
 6314 %}
 6315 
 6316 // Replaces vec during post-selection cleanup. See above.
 6317 operand vecY() %{
 6318   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6319   match(VecY);
 6320 
 6321   format %{ %}
 6322   interface(REG_INTER);
 6323 %}
 6324 
 6325 // Replaces legVec during post-selection cleanup. See above.
 6326 operand legVecY() %{
 6327   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6328   match(VecY);
 6329 
 6330   format %{ %}
 6331   interface(REG_INTER);
 6332 %}
 6333 
 6334 // Replaces vec during post-selection cleanup. See above.
 6335 operand vecZ() %{
 6336   constraint(ALLOC_IN_RC(vectorz_reg));
 6337   match(VecZ);
 6338 
 6339   format %{ %}
 6340   interface(REG_INTER);
 6341 %}
 6342 
 6343 // Replaces legVec during post-selection cleanup. See above.
 6344 operand legVecZ() %{
 6345   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6346   match(VecZ);
 6347 
 6348   format %{ %}
 6349   interface(REG_INTER);
 6350 %}
 6351 
 6352 //----------OPERAND CLASSES----------------------------------------------------
 6353 // Operand Classes are groups of operands that are used as to simplify
 6354 // instruction definitions by not requiring the AD writer to specify separate
 6355 // instructions for every form of operand when the instruction accepts
 6356 // multiple operand types with the same basic encoding and format.  The classic
 6357 // case of this is memory operands.
 6358 
 6359 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6360                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6361                indCompressedOop, indCompressedOopOffset,
 6362                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6363                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6364                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6365 
 6366 //----------PIPELINE-----------------------------------------------------------
 6367 // Rules which define the behavior of the target architectures pipeline.
 6368 pipeline %{
 6369 
 6370 //----------ATTRIBUTES---------------------------------------------------------
 6371 attributes %{
 6372   variable_size_instructions;        // Fixed size instructions
 6373   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6374   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6375   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6376   instruction_fetch_units = 1;       // of 16 bytes
 6377 %}
 6378 
 6379 //----------RESOURCES----------------------------------------------------------
 6380 // Resources are the functional units available to the machine
 6381 
 6382 // Generic P2/P3 pipeline
 6383 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6384 // 3 instructions decoded per cycle.
 6385 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6386 // 3 ALU op, only ALU0 handles mul instructions.
 6387 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6388            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6389            BR, FPU,
 6390            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6391 
 6392 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6393 // Pipeline Description specifies the stages in the machine's pipeline
 6394 
 6395 // Generic P2/P3 pipeline
 6396 pipe_desc(S0, S1, S2, S3, S4, S5);
 6397 
 6398 //----------PIPELINE CLASSES---------------------------------------------------
 6399 // Pipeline Classes describe the stages in which input and output are
 6400 // referenced by the hardware pipeline.
 6401 
 6402 // Naming convention: ialu or fpu
 6403 // Then: _reg
 6404 // Then: _reg if there is a 2nd register
 6405 // Then: _long if it's a pair of instructions implementing a long
 6406 // Then: _fat if it requires the big decoder
 6407 //   Or: _mem if it requires the big decoder and a memory unit.
 6408 
 6409 // Integer ALU reg operation
 6410 pipe_class ialu_reg(rRegI dst)
 6411 %{
 6412     single_instruction;
 6413     dst    : S4(write);
 6414     dst    : S3(read);
 6415     DECODE : S0;        // any decoder
 6416     ALU    : S3;        // any alu
 6417 %}
 6418 
 6419 // Long ALU reg operation
 6420 pipe_class ialu_reg_long(rRegL dst)
 6421 %{
 6422     instruction_count(2);
 6423     dst    : S4(write);
 6424     dst    : S3(read);
 6425     DECODE : S0(2);     // any 2 decoders
 6426     ALU    : S3(2);     // both alus
 6427 %}
 6428 
 6429 // Integer ALU reg operation using big decoder
 6430 pipe_class ialu_reg_fat(rRegI dst)
 6431 %{
 6432     single_instruction;
 6433     dst    : S4(write);
 6434     dst    : S3(read);
 6435     D0     : S0;        // big decoder only
 6436     ALU    : S3;        // any alu
 6437 %}
 6438 
 6439 // Integer ALU reg-reg operation
 6440 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6441 %{
 6442     single_instruction;
 6443     dst    : S4(write);
 6444     src    : S3(read);
 6445     DECODE : S0;        // any decoder
 6446     ALU    : S3;        // any alu
 6447 %}
 6448 
 6449 // Integer ALU reg-reg operation
 6450 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6451 %{
 6452     single_instruction;
 6453     dst    : S4(write);
 6454     src    : S3(read);
 6455     D0     : S0;        // big decoder only
 6456     ALU    : S3;        // any alu
 6457 %}
 6458 
 6459 // Integer ALU reg-mem operation
 6460 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6461 %{
 6462     single_instruction;
 6463     dst    : S5(write);
 6464     mem    : S3(read);
 6465     D0     : S0;        // big decoder only
 6466     ALU    : S4;        // any alu
 6467     MEM    : S3;        // any mem
 6468 %}
 6469 
 6470 // Integer mem operation (prefetch)
 6471 pipe_class ialu_mem(memory mem)
 6472 %{
 6473     single_instruction;
 6474     mem    : S3(read);
 6475     D0     : S0;        // big decoder only
 6476     MEM    : S3;        // any mem
 6477 %}
 6478 
 6479 // Integer Store to Memory
 6480 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6481 %{
 6482     single_instruction;
 6483     mem    : S3(read);
 6484     src    : S5(read);
 6485     D0     : S0;        // big decoder only
 6486     ALU    : S4;        // any alu
 6487     MEM    : S3;
 6488 %}
 6489 
 6490 // // Long Store to Memory
 6491 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6492 // %{
 6493 //     instruction_count(2);
 6494 //     mem    : S3(read);
 6495 //     src    : S5(read);
 6496 //     D0     : S0(2);          // big decoder only; twice
 6497 //     ALU    : S4(2);     // any 2 alus
 6498 //     MEM    : S3(2);  // Both mems
 6499 // %}
 6500 
 6501 // Integer Store to Memory
 6502 pipe_class ialu_mem_imm(memory mem)
 6503 %{
 6504     single_instruction;
 6505     mem    : S3(read);
 6506     D0     : S0;        // big decoder only
 6507     ALU    : S4;        // any alu
 6508     MEM    : S3;
 6509 %}
 6510 
 6511 // Integer ALU0 reg-reg operation
 6512 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6513 %{
 6514     single_instruction;
 6515     dst    : S4(write);
 6516     src    : S3(read);
 6517     D0     : S0;        // Big decoder only
 6518     ALU0   : S3;        // only alu0
 6519 %}
 6520 
 6521 // Integer ALU0 reg-mem operation
 6522 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6523 %{
 6524     single_instruction;
 6525     dst    : S5(write);
 6526     mem    : S3(read);
 6527     D0     : S0;        // big decoder only
 6528     ALU0   : S4;        // ALU0 only
 6529     MEM    : S3;        // any mem
 6530 %}
 6531 
 6532 // Integer ALU reg-reg operation
 6533 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6534 %{
 6535     single_instruction;
 6536     cr     : S4(write);
 6537     src1   : S3(read);
 6538     src2   : S3(read);
 6539     DECODE : S0;        // any decoder
 6540     ALU    : S3;        // any alu
 6541 %}
 6542 
 6543 // Integer ALU reg-imm operation
 6544 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6545 %{
 6546     single_instruction;
 6547     cr     : S4(write);
 6548     src1   : S3(read);
 6549     DECODE : S0;        // any decoder
 6550     ALU    : S3;        // any alu
 6551 %}
 6552 
 6553 // Integer ALU reg-mem operation
 6554 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6555 %{
 6556     single_instruction;
 6557     cr     : S4(write);
 6558     src1   : S3(read);
 6559     src2   : S3(read);
 6560     D0     : S0;        // big decoder only
 6561     ALU    : S4;        // any alu
 6562     MEM    : S3;
 6563 %}
 6564 
 6565 // Conditional move reg-reg
 6566 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6567 %{
 6568     instruction_count(4);
 6569     y      : S4(read);
 6570     q      : S3(read);
 6571     p      : S3(read);
 6572     DECODE : S0(4);     // any decoder
 6573 %}
 6574 
 6575 // Conditional move reg-reg
 6576 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6577 %{
 6578     single_instruction;
 6579     dst    : S4(write);
 6580     src    : S3(read);
 6581     cr     : S3(read);
 6582     DECODE : S0;        // any decoder
 6583 %}
 6584 
 6585 // Conditional move reg-mem
 6586 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6587 %{
 6588     single_instruction;
 6589     dst    : S4(write);
 6590     src    : S3(read);
 6591     cr     : S3(read);
 6592     DECODE : S0;        // any decoder
 6593     MEM    : S3;
 6594 %}
 6595 
 6596 // Conditional move reg-reg long
 6597 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6598 %{
 6599     single_instruction;
 6600     dst    : S4(write);
 6601     src    : S3(read);
 6602     cr     : S3(read);
 6603     DECODE : S0(2);     // any 2 decoders
 6604 %}
 6605 
 6606 // Float reg-reg operation
 6607 pipe_class fpu_reg(regD dst)
 6608 %{
 6609     instruction_count(2);
 6610     dst    : S3(read);
 6611     DECODE : S0(2);     // any 2 decoders
 6612     FPU    : S3;
 6613 %}
 6614 
 6615 // Float reg-reg operation
 6616 pipe_class fpu_reg_reg(regD dst, regD src)
 6617 %{
 6618     instruction_count(2);
 6619     dst    : S4(write);
 6620     src    : S3(read);
 6621     DECODE : S0(2);     // any 2 decoders
 6622     FPU    : S3;
 6623 %}
 6624 
 6625 // Float reg-reg operation
 6626 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6627 %{
 6628     instruction_count(3);
 6629     dst    : S4(write);
 6630     src1   : S3(read);
 6631     src2   : S3(read);
 6632     DECODE : S0(3);     // any 3 decoders
 6633     FPU    : S3(2);
 6634 %}
 6635 
 6636 // Float reg-reg operation
 6637 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6638 %{
 6639     instruction_count(4);
 6640     dst    : S4(write);
 6641     src1   : S3(read);
 6642     src2   : S3(read);
 6643     src3   : S3(read);
 6644     DECODE : S0(4);     // any 3 decoders
 6645     FPU    : S3(2);
 6646 %}
 6647 
 6648 // Float reg-reg operation
 6649 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6650 %{
 6651     instruction_count(4);
 6652     dst    : S4(write);
 6653     src1   : S3(read);
 6654     src2   : S3(read);
 6655     src3   : S3(read);
 6656     DECODE : S1(3);     // any 3 decoders
 6657     D0     : S0;        // Big decoder only
 6658     FPU    : S3(2);
 6659     MEM    : S3;
 6660 %}
 6661 
 6662 // Float reg-mem operation
 6663 pipe_class fpu_reg_mem(regD dst, memory mem)
 6664 %{
 6665     instruction_count(2);
 6666     dst    : S5(write);
 6667     mem    : S3(read);
 6668     D0     : S0;        // big decoder only
 6669     DECODE : S1;        // any decoder for FPU POP
 6670     FPU    : S4;
 6671     MEM    : S3;        // any mem
 6672 %}
 6673 
 6674 // Float reg-mem operation
 6675 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6676 %{
 6677     instruction_count(3);
 6678     dst    : S5(write);
 6679     src1   : S3(read);
 6680     mem    : S3(read);
 6681     D0     : S0;        // big decoder only
 6682     DECODE : S1(2);     // any decoder for FPU POP
 6683     FPU    : S4;
 6684     MEM    : S3;        // any mem
 6685 %}
 6686 
 6687 // Float mem-reg operation
 6688 pipe_class fpu_mem_reg(memory mem, regD src)
 6689 %{
 6690     instruction_count(2);
 6691     src    : S5(read);
 6692     mem    : S3(read);
 6693     DECODE : S0;        // any decoder for FPU PUSH
 6694     D0     : S1;        // big decoder only
 6695     FPU    : S4;
 6696     MEM    : S3;        // any mem
 6697 %}
 6698 
 6699 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6700 %{
 6701     instruction_count(3);
 6702     src1   : S3(read);
 6703     src2   : S3(read);
 6704     mem    : S3(read);
 6705     DECODE : S0(2);     // any decoder for FPU PUSH
 6706     D0     : S1;        // big decoder only
 6707     FPU    : S4;
 6708     MEM    : S3;        // any mem
 6709 %}
 6710 
 6711 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6712 %{
 6713     instruction_count(3);
 6714     src1   : S3(read);
 6715     src2   : S3(read);
 6716     mem    : S4(read);
 6717     DECODE : S0;        // any decoder for FPU PUSH
 6718     D0     : S0(2);     // big decoder only
 6719     FPU    : S4;
 6720     MEM    : S3(2);     // any mem
 6721 %}
 6722 
 6723 pipe_class fpu_mem_mem(memory dst, memory src1)
 6724 %{
 6725     instruction_count(2);
 6726     src1   : S3(read);
 6727     dst    : S4(read);
 6728     D0     : S0(2);     // big decoder only
 6729     MEM    : S3(2);     // any mem
 6730 %}
 6731 
 6732 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6733 %{
 6734     instruction_count(3);
 6735     src1   : S3(read);
 6736     src2   : S3(read);
 6737     dst    : S4(read);
 6738     D0     : S0(3);     // big decoder only
 6739     FPU    : S4;
 6740     MEM    : S3(3);     // any mem
 6741 %}
 6742 
 6743 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6744 %{
 6745     instruction_count(3);
 6746     src1   : S4(read);
 6747     mem    : S4(read);
 6748     DECODE : S0;        // any decoder for FPU PUSH
 6749     D0     : S0(2);     // big decoder only
 6750     FPU    : S4;
 6751     MEM    : S3(2);     // any mem
 6752 %}
 6753 
 6754 // Float load constant
 6755 pipe_class fpu_reg_con(regD dst)
 6756 %{
 6757     instruction_count(2);
 6758     dst    : S5(write);
 6759     D0     : S0;        // big decoder only for the load
 6760     DECODE : S1;        // any decoder for FPU POP
 6761     FPU    : S4;
 6762     MEM    : S3;        // any mem
 6763 %}
 6764 
 6765 // Float load constant
 6766 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6767 %{
 6768     instruction_count(3);
 6769     dst    : S5(write);
 6770     src    : S3(read);
 6771     D0     : S0;        // big decoder only for the load
 6772     DECODE : S1(2);     // any decoder for FPU POP
 6773     FPU    : S4;
 6774     MEM    : S3;        // any mem
 6775 %}
 6776 
 6777 // UnConditional branch
 6778 pipe_class pipe_jmp(label labl)
 6779 %{
 6780     single_instruction;
 6781     BR   : S3;
 6782 %}
 6783 
 6784 // Conditional branch
 6785 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6786 %{
 6787     single_instruction;
 6788     cr    : S1(read);
 6789     BR    : S3;
 6790 %}
 6791 
 6792 // Allocation idiom
 6793 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6794 %{
 6795     instruction_count(1); force_serialization;
 6796     fixed_latency(6);
 6797     heap_ptr : S3(read);
 6798     DECODE   : S0(3);
 6799     D0       : S2;
 6800     MEM      : S3;
 6801     ALU      : S3(2);
 6802     dst      : S5(write);
 6803     BR       : S5;
 6804 %}
 6805 
 6806 // Generic big/slow expanded idiom
 6807 pipe_class pipe_slow()
 6808 %{
 6809     instruction_count(10); multiple_bundles; force_serialization;
 6810     fixed_latency(100);
 6811     D0  : S0(2);
 6812     MEM : S3(2);
 6813 %}
 6814 
 6815 // The real do-nothing guy
 6816 pipe_class empty()
 6817 %{
 6818     instruction_count(0);
 6819 %}
 6820 
 6821 // Define the class for the Nop node
 6822 define
 6823 %{
 6824    MachNop = empty;
 6825 %}
 6826 
 6827 %}
 6828 
 6829 //----------INSTRUCTIONS-------------------------------------------------------
 6830 //
 6831 // match      -- States which machine-independent subtree may be replaced
 6832 //               by this instruction.
 6833 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6834 //               selection to identify a minimum cost tree of machine
 6835 //               instructions that matches a tree of machine-independent
 6836 //               instructions.
 6837 // format     -- A string providing the disassembly for this instruction.
 6838 //               The value of an instruction's operand may be inserted
 6839 //               by referring to it with a '$' prefix.
 6840 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6841 //               to within an encode class as $primary, $secondary, and $tertiary
 6842 //               rrspectively.  The primary opcode is commonly used to
 6843 //               indicate the type of machine instruction, while secondary
 6844 //               and tertiary are often used for prefix options or addressing
 6845 //               modes.
 6846 // ins_encode -- A list of encode classes with parameters. The encode class
 6847 //               name must have been defined in an 'enc_class' specification
 6848 //               in the encode section of the architecture description.
 6849 
 6850 // ============================================================================
 6851 
 6852 instruct ShouldNotReachHere() %{
 6853   match(Halt);
 6854   format %{ "stop\t# ShouldNotReachHere" %}
 6855   ins_encode %{
 6856     if (is_reachable()) {
 6857       const char* str = __ code_string(_halt_reason);
 6858       __ stop(str);
 6859     }
 6860   %}
 6861   ins_pipe(pipe_slow);
 6862 %}
 6863 
 6864 // ============================================================================
 6865 
 6866 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6867 // Load Float
 6868 instruct MoveF2VL(vlRegF dst, regF src) %{
 6869   match(Set dst src);
 6870   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6871   ins_encode %{
 6872     ShouldNotReachHere();
 6873   %}
 6874   ins_pipe( fpu_reg_reg );
 6875 %}
 6876 
 6877 // Load Float
 6878 instruct MoveF2LEG(legRegF dst, regF src) %{
 6879   match(Set dst src);
 6880   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6881   ins_encode %{
 6882     ShouldNotReachHere();
 6883   %}
 6884   ins_pipe( fpu_reg_reg );
 6885 %}
 6886 
 6887 // Load Float
 6888 instruct MoveVL2F(regF dst, vlRegF src) %{
 6889   match(Set dst src);
 6890   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6891   ins_encode %{
 6892     ShouldNotReachHere();
 6893   %}
 6894   ins_pipe( fpu_reg_reg );
 6895 %}
 6896 
 6897 // Load Float
 6898 instruct MoveLEG2F(regF dst, legRegF src) %{
 6899   match(Set dst src);
 6900   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6901   ins_encode %{
 6902     ShouldNotReachHere();
 6903   %}
 6904   ins_pipe( fpu_reg_reg );
 6905 %}
 6906 
 6907 // Load Double
 6908 instruct MoveD2VL(vlRegD dst, regD src) %{
 6909   match(Set dst src);
 6910   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6911   ins_encode %{
 6912     ShouldNotReachHere();
 6913   %}
 6914   ins_pipe( fpu_reg_reg );
 6915 %}
 6916 
 6917 // Load Double
 6918 instruct MoveD2LEG(legRegD dst, regD src) %{
 6919   match(Set dst src);
 6920   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6921   ins_encode %{
 6922     ShouldNotReachHere();
 6923   %}
 6924   ins_pipe( fpu_reg_reg );
 6925 %}
 6926 
 6927 // Load Double
 6928 instruct MoveVL2D(regD dst, vlRegD src) %{
 6929   match(Set dst src);
 6930   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6931   ins_encode %{
 6932     ShouldNotReachHere();
 6933   %}
 6934   ins_pipe( fpu_reg_reg );
 6935 %}
 6936 
 6937 // Load Double
 6938 instruct MoveLEG2D(regD dst, legRegD src) %{
 6939   match(Set dst src);
 6940   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6941   ins_encode %{
 6942     ShouldNotReachHere();
 6943   %}
 6944   ins_pipe( fpu_reg_reg );
 6945 %}
 6946 
 6947 //----------Load/Store/Move Instructions---------------------------------------
 6948 //----------Load Instructions--------------------------------------------------
 6949 
 6950 // Load Byte (8 bit signed)
 6951 instruct loadB(rRegI dst, memory mem)
 6952 %{
 6953   match(Set dst (LoadB mem));
 6954 
 6955   ins_cost(125);
 6956   format %{ "movsbl  $dst, $mem\t# byte" %}
 6957 
 6958   ins_encode %{
 6959     __ movsbl($dst$$Register, $mem$$Address);
 6960   %}
 6961 
 6962   ins_pipe(ialu_reg_mem);
 6963 %}
 6964 
 6965 // Load Byte (8 bit signed) into Long Register
 6966 instruct loadB2L(rRegL dst, memory mem)
 6967 %{
 6968   match(Set dst (ConvI2L (LoadB mem)));
 6969 
 6970   ins_cost(125);
 6971   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6972 
 6973   ins_encode %{
 6974     __ movsbq($dst$$Register, $mem$$Address);
 6975   %}
 6976 
 6977   ins_pipe(ialu_reg_mem);
 6978 %}
 6979 
 6980 // Load Unsigned Byte (8 bit UNsigned)
 6981 instruct loadUB(rRegI dst, memory mem)
 6982 %{
 6983   match(Set dst (LoadUB mem));
 6984 
 6985   ins_cost(125);
 6986   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6987 
 6988   ins_encode %{
 6989     __ movzbl($dst$$Register, $mem$$Address);
 6990   %}
 6991 
 6992   ins_pipe(ialu_reg_mem);
 6993 %}
 6994 
 6995 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6996 instruct loadUB2L(rRegL dst, memory mem)
 6997 %{
 6998   match(Set dst (ConvI2L (LoadUB mem)));
 6999 
 7000   ins_cost(125);
 7001   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 7002 
 7003   ins_encode %{
 7004     __ movzbq($dst$$Register, $mem$$Address);
 7005   %}
 7006 
 7007   ins_pipe(ialu_reg_mem);
 7008 %}
 7009 
 7010 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 7011 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7012   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 7013   effect(KILL cr);
 7014 
 7015   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 7016             "andl    $dst, right_n_bits($mask, 8)" %}
 7017   ins_encode %{
 7018     Register Rdst = $dst$$Register;
 7019     __ movzbq(Rdst, $mem$$Address);
 7020     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 7021   %}
 7022   ins_pipe(ialu_reg_mem);
 7023 %}
 7024 
 7025 // Load Short (16 bit signed)
 7026 instruct loadS(rRegI dst, memory mem)
 7027 %{
 7028   match(Set dst (LoadS mem));
 7029 
 7030   ins_cost(125);
 7031   format %{ "movswl $dst, $mem\t# short" %}
 7032 
 7033   ins_encode %{
 7034     __ movswl($dst$$Register, $mem$$Address);
 7035   %}
 7036 
 7037   ins_pipe(ialu_reg_mem);
 7038 %}
 7039 
 7040 // Load Short (16 bit signed) to Byte (8 bit signed)
 7041 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7042   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 7043 
 7044   ins_cost(125);
 7045   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 7046   ins_encode %{
 7047     __ movsbl($dst$$Register, $mem$$Address);
 7048   %}
 7049   ins_pipe(ialu_reg_mem);
 7050 %}
 7051 
 7052 // Load Short (16 bit signed) into Long Register
 7053 instruct loadS2L(rRegL dst, memory mem)
 7054 %{
 7055   match(Set dst (ConvI2L (LoadS mem)));
 7056 
 7057   ins_cost(125);
 7058   format %{ "movswq $dst, $mem\t# short -> long" %}
 7059 
 7060   ins_encode %{
 7061     __ movswq($dst$$Register, $mem$$Address);
 7062   %}
 7063 
 7064   ins_pipe(ialu_reg_mem);
 7065 %}
 7066 
 7067 // Load Unsigned Short/Char (16 bit UNsigned)
 7068 instruct loadUS(rRegI dst, memory mem)
 7069 %{
 7070   match(Set dst (LoadUS mem));
 7071 
 7072   ins_cost(125);
 7073   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 7074 
 7075   ins_encode %{
 7076     __ movzwl($dst$$Register, $mem$$Address);
 7077   %}
 7078 
 7079   ins_pipe(ialu_reg_mem);
 7080 %}
 7081 
 7082 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 7083 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7084   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 7085 
 7086   ins_cost(125);
 7087   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 7088   ins_encode %{
 7089     __ movsbl($dst$$Register, $mem$$Address);
 7090   %}
 7091   ins_pipe(ialu_reg_mem);
 7092 %}
 7093 
 7094 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 7095 instruct loadUS2L(rRegL dst, memory mem)
 7096 %{
 7097   match(Set dst (ConvI2L (LoadUS mem)));
 7098 
 7099   ins_cost(125);
 7100   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 7101 
 7102   ins_encode %{
 7103     __ movzwq($dst$$Register, $mem$$Address);
 7104   %}
 7105 
 7106   ins_pipe(ialu_reg_mem);
 7107 %}
 7108 
 7109 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7110 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7111   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7112 
 7113   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7114   ins_encode %{
 7115     __ movzbq($dst$$Register, $mem$$Address);
 7116   %}
 7117   ins_pipe(ialu_reg_mem);
 7118 %}
 7119 
 7120 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7121 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7122   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7123   effect(KILL cr);
 7124 
 7125   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7126             "andl    $dst, right_n_bits($mask, 16)" %}
 7127   ins_encode %{
 7128     Register Rdst = $dst$$Register;
 7129     __ movzwq(Rdst, $mem$$Address);
 7130     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7131   %}
 7132   ins_pipe(ialu_reg_mem);
 7133 %}
 7134 
 7135 // Load Integer
 7136 instruct loadI(rRegI dst, memory mem)
 7137 %{
 7138   match(Set dst (LoadI mem));
 7139 
 7140   ins_cost(125);
 7141   format %{ "movl    $dst, $mem\t# int" %}
 7142 
 7143   ins_encode %{
 7144     __ movl($dst$$Register, $mem$$Address);
 7145   %}
 7146 
 7147   ins_pipe(ialu_reg_mem);
 7148 %}
 7149 
 7150 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7151 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7152   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7153 
 7154   ins_cost(125);
 7155   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7156   ins_encode %{
 7157     __ movsbl($dst$$Register, $mem$$Address);
 7158   %}
 7159   ins_pipe(ialu_reg_mem);
 7160 %}
 7161 
 7162 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7163 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7164   match(Set dst (AndI (LoadI mem) mask));
 7165 
 7166   ins_cost(125);
 7167   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7168   ins_encode %{
 7169     __ movzbl($dst$$Register, $mem$$Address);
 7170   %}
 7171   ins_pipe(ialu_reg_mem);
 7172 %}
 7173 
 7174 // Load Integer (32 bit signed) to Short (16 bit signed)
 7175 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7176   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7177 
 7178   ins_cost(125);
 7179   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7180   ins_encode %{
 7181     __ movswl($dst$$Register, $mem$$Address);
 7182   %}
 7183   ins_pipe(ialu_reg_mem);
 7184 %}
 7185 
 7186 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7187 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7188   match(Set dst (AndI (LoadI mem) mask));
 7189 
 7190   ins_cost(125);
 7191   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7192   ins_encode %{
 7193     __ movzwl($dst$$Register, $mem$$Address);
 7194   %}
 7195   ins_pipe(ialu_reg_mem);
 7196 %}
 7197 
 7198 // Load Integer into Long Register
 7199 instruct loadI2L(rRegL dst, memory mem)
 7200 %{
 7201   match(Set dst (ConvI2L (LoadI mem)));
 7202 
 7203   ins_cost(125);
 7204   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7205 
 7206   ins_encode %{
 7207     __ movslq($dst$$Register, $mem$$Address);
 7208   %}
 7209 
 7210   ins_pipe(ialu_reg_mem);
 7211 %}
 7212 
 7213 // Load Integer with mask 0xFF into Long Register
 7214 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7215   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7216 
 7217   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7218   ins_encode %{
 7219     __ movzbq($dst$$Register, $mem$$Address);
 7220   %}
 7221   ins_pipe(ialu_reg_mem);
 7222 %}
 7223 
 7224 // Load Integer with mask 0xFFFF into Long Register
 7225 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7226   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7227 
 7228   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7229   ins_encode %{
 7230     __ movzwq($dst$$Register, $mem$$Address);
 7231   %}
 7232   ins_pipe(ialu_reg_mem);
 7233 %}
 7234 
 7235 // Load Integer with a 31-bit mask into Long Register
 7236 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7237   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7238   effect(KILL cr);
 7239 
 7240   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7241             "andl    $dst, $mask" %}
 7242   ins_encode %{
 7243     Register Rdst = $dst$$Register;
 7244     __ movl(Rdst, $mem$$Address);
 7245     __ andl(Rdst, $mask$$constant);
 7246   %}
 7247   ins_pipe(ialu_reg_mem);
 7248 %}
 7249 
 7250 // Load Unsigned Integer into Long Register
 7251 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7252 %{
 7253   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7254 
 7255   ins_cost(125);
 7256   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7257 
 7258   ins_encode %{
 7259     __ movl($dst$$Register, $mem$$Address);
 7260   %}
 7261 
 7262   ins_pipe(ialu_reg_mem);
 7263 %}
 7264 
 7265 // Load Long
 7266 instruct loadL(rRegL dst, memory mem)
 7267 %{
 7268   match(Set dst (LoadL mem));
 7269 
 7270   ins_cost(125);
 7271   format %{ "movq    $dst, $mem\t# long" %}
 7272 
 7273   ins_encode %{
 7274     __ movq($dst$$Register, $mem$$Address);
 7275   %}
 7276 
 7277   ins_pipe(ialu_reg_mem); // XXX
 7278 %}
 7279 
 7280 // Load Range
 7281 instruct loadRange(rRegI dst, memory mem)
 7282 %{
 7283   match(Set dst (LoadRange mem));
 7284 
 7285   ins_cost(125); // XXX
 7286   format %{ "movl    $dst, $mem\t# range" %}
 7287   ins_encode %{
 7288     __ movl($dst$$Register, $mem$$Address);
 7289   %}
 7290   ins_pipe(ialu_reg_mem);
 7291 %}
 7292 
 7293 // Load Pointer
 7294 instruct loadP(rRegP dst, memory mem)
 7295 %{
 7296   match(Set dst (LoadP mem));
 7297   predicate(n->as_Load()->barrier_data() == 0);
 7298 
 7299   ins_cost(125); // XXX
 7300   format %{ "movq    $dst, $mem\t# ptr" %}
 7301   ins_encode %{
 7302     __ movq($dst$$Register, $mem$$Address);
 7303   %}
 7304   ins_pipe(ialu_reg_mem); // XXX
 7305 %}
 7306 
 7307 // Load Compressed Pointer
 7308 instruct loadN(rRegN dst, memory mem)
 7309 %{
 7310    predicate(n->as_Load()->barrier_data() == 0);
 7311    match(Set dst (LoadN mem));
 7312 
 7313    ins_cost(125); // XXX
 7314    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7315    ins_encode %{
 7316      __ movl($dst$$Register, $mem$$Address);
 7317    %}
 7318    ins_pipe(ialu_reg_mem); // XXX
 7319 %}
 7320 
 7321 
 7322 // Load Klass Pointer
 7323 instruct loadKlass(rRegP dst, memory mem)
 7324 %{
 7325   match(Set dst (LoadKlass mem));
 7326 
 7327   ins_cost(125); // XXX
 7328   format %{ "movq    $dst, $mem\t# class" %}
 7329   ins_encode %{
 7330     __ movq($dst$$Register, $mem$$Address);
 7331   %}
 7332   ins_pipe(ialu_reg_mem); // XXX
 7333 %}
 7334 
 7335 // Load narrow Klass Pointer
 7336 instruct loadNKlass(rRegN dst, memory mem)
 7337 %{
 7338   predicate(!UseCompactObjectHeaders);
 7339   match(Set dst (LoadNKlass mem));
 7340 
 7341   ins_cost(125); // XXX
 7342   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7343   ins_encode %{
 7344     __ movl($dst$$Register, $mem$$Address);
 7345   %}
 7346   ins_pipe(ialu_reg_mem); // XXX
 7347 %}
 7348 
 7349 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7350 %{
 7351   predicate(UseCompactObjectHeaders);
 7352   match(Set dst (LoadNKlass mem));
 7353   effect(KILL cr);
 7354   ins_cost(125);
 7355   format %{
 7356     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7357     "shrl    $dst, markWord::klass_shift_at_offset"
 7358   %}
 7359   ins_encode %{
 7360     if (UseAPX) {
 7361       __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
 7362     }
 7363     else {
 7364       __ movl($dst$$Register, $mem$$Address);
 7365       __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7366     }
 7367   %}
 7368   ins_pipe(ialu_reg_mem);
 7369 %}
 7370 
 7371 // Load Float
 7372 instruct loadF(regF dst, memory mem)
 7373 %{
 7374   match(Set dst (LoadF mem));
 7375 
 7376   ins_cost(145); // XXX
 7377   format %{ "movss   $dst, $mem\t# float" %}
 7378   ins_encode %{
 7379     __ movflt($dst$$XMMRegister, $mem$$Address);
 7380   %}
 7381   ins_pipe(pipe_slow); // XXX
 7382 %}
 7383 
 7384 // Load Double
 7385 instruct loadD_partial(regD dst, memory mem)
 7386 %{
 7387   predicate(!UseXmmLoadAndClearUpper);
 7388   match(Set dst (LoadD mem));
 7389 
 7390   ins_cost(145); // XXX
 7391   format %{ "movlpd  $dst, $mem\t# double" %}
 7392   ins_encode %{
 7393     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7394   %}
 7395   ins_pipe(pipe_slow); // XXX
 7396 %}
 7397 
 7398 instruct loadD(regD dst, memory mem)
 7399 %{
 7400   predicate(UseXmmLoadAndClearUpper);
 7401   match(Set dst (LoadD mem));
 7402 
 7403   ins_cost(145); // XXX
 7404   format %{ "movsd   $dst, $mem\t# double" %}
 7405   ins_encode %{
 7406     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7407   %}
 7408   ins_pipe(pipe_slow); // XXX
 7409 %}
 7410 
 7411 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
 7412 %{
 7413   match(Set dst con);
 7414 
 7415   format %{ "leaq  $dst, $con\t# AOT Runtime Constants Address" %}
 7416 
 7417   ins_encode %{
 7418     __ load_aotrc_address($dst$$Register, (address)$con$$constant);
 7419   %}
 7420 
 7421   ins_pipe(ialu_reg_fat);
 7422 %}
 7423 
 7424 // min = java.lang.Math.min(float a, float b)
 7425 // max = java.lang.Math.max(float a, float b)
 7426 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
 7427 %{
 7428   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7429   match(Set dst (MaxF a b));
 7430   match(Set dst (MinF a b));
 7431 
 7432   format %{ "minmaxF $dst, $a, $b" %}
 7433   ins_encode %{
 7434     int opcode = this->ideal_Opcode();
 7435     __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7436   %}
 7437   ins_pipe( pipe_slow );
 7438 %}
 7439 
 7440 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, regF xtmp, rRegI rtmp, rFlagsReg cr)
 7441 %{
 7442   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7443   match(Set dst (MaxF a b));
 7444   match(Set dst (MinF a b));
 7445   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7446 
 7447   format %{ "minmaxF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7448   ins_encode %{
 7449     int opcode = this->ideal_Opcode();
 7450     bool min = (opcode == Op_MinF) ? true : false;
 7451     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7452                     min, fp_prec_flt /*pt*/);
 7453   %}
 7454   ins_pipe( pipe_slow );
 7455 %}
 7456 
 7457 // min = java.lang.Math.min(float a, float b)
 7458 // max = java.lang.Math.max(float a, float b)
 7459 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
 7460 %{
 7461   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7462   match(Set dst (MaxF a b));
 7463   match(Set dst (MinF a b));
 7464   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7465 
 7466   format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7467   ins_encode %{
 7468     int opcode = this->ideal_Opcode();
 7469     int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
 7470     __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7471                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7472   %}
 7473   ins_pipe( pipe_slow );
 7474 %}
 7475 
 7476 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr)
 7477 %{
 7478   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7479   match(Set dst (MaxF a b));
 7480   match(Set dst (MinF a b));
 7481   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7482 
 7483   format %{ "minmaxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7484   ins_encode %{
 7485     int opcode = this->ideal_Opcode();
 7486     bool min = (opcode == Op_MinF) ? true : false;
 7487     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7488                     min, fp_prec_flt /*pt*/);
 7489   %}
 7490   ins_pipe( pipe_slow );
 7491 %}
 7492 
 7493 // min = java.lang.Math.min(double a, double b)
 7494 // max = java.lang.Math.max(double a, double b)
 7495 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
 7496 %{
 7497   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7498   match(Set dst (MaxD a b));
 7499   match(Set dst (MinD a b));
 7500 
 7501   format %{ "minmaxD $dst, $a, $b" %}
 7502   ins_encode %{
 7503     int opcode = this->ideal_Opcode();
 7504     __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7505   %}
 7506   ins_pipe( pipe_slow );
 7507 %}
 7508 
 7509 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, regD xtmp, rRegI rtmp, rFlagsReg cr)
 7510 %{
 7511   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7512   match(Set dst (MaxD a b));
 7513   match(Set dst (MinD a b));
 7514   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7515 
 7516   format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7517   ins_encode %{
 7518     int opcode = this->ideal_Opcode();
 7519     bool min = (opcode == Op_MinD) ? true : false;
 7520     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7521                     min, fp_prec_dbl /*pt*/);
 7522   %}
 7523   ins_pipe( pipe_slow );
 7524 %}
 7525 
 7526 // min = java.lang.Math.min(double a, double b)
 7527 // max = java.lang.Math.max(double a, double b)
 7528 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
 7529 %{
 7530   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7531   match(Set dst (MaxD a b));
 7532   match(Set dst (MinD a b));
 7533   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7534 
 7535   format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7536   ins_encode %{
 7537     int opcode = this->ideal_Opcode();
 7538     int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
 7539     __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7540                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7541   %}
 7542   ins_pipe( pipe_slow );
 7543 %}
 7544 
 7545 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr)
 7546 %{
 7547   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7548   match(Set dst (MaxD a b));
 7549   match(Set dst (MinD a b));
 7550   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7551 
 7552   format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7553   ins_encode %{
 7554     int opcode = this->ideal_Opcode();
 7555     bool min = (opcode == Op_MinD) ? true : false;
 7556     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7557                     min, fp_prec_dbl /*pt*/);
 7558   %}
 7559   ins_pipe( pipe_slow );
 7560 %}
 7561 
 7562 // Load Effective Address
 7563 instruct leaP8(rRegP dst, indOffset8 mem)
 7564 %{
 7565   match(Set dst mem);
 7566 
 7567   ins_cost(110); // XXX
 7568   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7569   ins_encode %{
 7570     __ leaq($dst$$Register, $mem$$Address);
 7571   %}
 7572   ins_pipe(ialu_reg_reg_fat);
 7573 %}
 7574 
 7575 instruct leaP32(rRegP dst, indOffset32 mem)
 7576 %{
 7577   match(Set dst mem);
 7578 
 7579   ins_cost(110);
 7580   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7581   ins_encode %{
 7582     __ leaq($dst$$Register, $mem$$Address);
 7583   %}
 7584   ins_pipe(ialu_reg_reg_fat);
 7585 %}
 7586 
 7587 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7588 %{
 7589   match(Set dst mem);
 7590 
 7591   ins_cost(110);
 7592   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7593   ins_encode %{
 7594     __ leaq($dst$$Register, $mem$$Address);
 7595   %}
 7596   ins_pipe(ialu_reg_reg_fat);
 7597 %}
 7598 
 7599 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7600 %{
 7601   match(Set dst mem);
 7602 
 7603   ins_cost(110);
 7604   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7605   ins_encode %{
 7606     __ leaq($dst$$Register, $mem$$Address);
 7607   %}
 7608   ins_pipe(ialu_reg_reg_fat);
 7609 %}
 7610 
 7611 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7612 %{
 7613   match(Set dst mem);
 7614 
 7615   ins_cost(110);
 7616   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7617   ins_encode %{
 7618     __ leaq($dst$$Register, $mem$$Address);
 7619   %}
 7620   ins_pipe(ialu_reg_reg_fat);
 7621 %}
 7622 
 7623 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7624 %{
 7625   match(Set dst mem);
 7626 
 7627   ins_cost(110);
 7628   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7629   ins_encode %{
 7630     __ leaq($dst$$Register, $mem$$Address);
 7631   %}
 7632   ins_pipe(ialu_reg_reg_fat);
 7633 %}
 7634 
 7635 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7636 %{
 7637   match(Set dst mem);
 7638 
 7639   ins_cost(110);
 7640   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7641   ins_encode %{
 7642     __ leaq($dst$$Register, $mem$$Address);
 7643   %}
 7644   ins_pipe(ialu_reg_reg_fat);
 7645 %}
 7646 
 7647 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7648 %{
 7649   match(Set dst mem);
 7650 
 7651   ins_cost(110);
 7652   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7653   ins_encode %{
 7654     __ leaq($dst$$Register, $mem$$Address);
 7655   %}
 7656   ins_pipe(ialu_reg_reg_fat);
 7657 %}
 7658 
 7659 // Load Effective Address which uses Narrow (32-bits) oop
 7660 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7661 %{
 7662   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7663   match(Set dst mem);
 7664 
 7665   ins_cost(110);
 7666   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7667   ins_encode %{
 7668     __ leaq($dst$$Register, $mem$$Address);
 7669   %}
 7670   ins_pipe(ialu_reg_reg_fat);
 7671 %}
 7672 
 7673 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7674 %{
 7675   predicate(CompressedOops::shift() == 0);
 7676   match(Set dst mem);
 7677 
 7678   ins_cost(110); // XXX
 7679   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7680   ins_encode %{
 7681     __ leaq($dst$$Register, $mem$$Address);
 7682   %}
 7683   ins_pipe(ialu_reg_reg_fat);
 7684 %}
 7685 
 7686 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7687 %{
 7688   predicate(CompressedOops::shift() == 0);
 7689   match(Set dst mem);
 7690 
 7691   ins_cost(110);
 7692   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7693   ins_encode %{
 7694     __ leaq($dst$$Register, $mem$$Address);
 7695   %}
 7696   ins_pipe(ialu_reg_reg_fat);
 7697 %}
 7698 
 7699 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7700 %{
 7701   predicate(CompressedOops::shift() == 0);
 7702   match(Set dst mem);
 7703 
 7704   ins_cost(110);
 7705   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7706   ins_encode %{
 7707     __ leaq($dst$$Register, $mem$$Address);
 7708   %}
 7709   ins_pipe(ialu_reg_reg_fat);
 7710 %}
 7711 
 7712 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7713 %{
 7714   predicate(CompressedOops::shift() == 0);
 7715   match(Set dst mem);
 7716 
 7717   ins_cost(110);
 7718   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7719   ins_encode %{
 7720     __ leaq($dst$$Register, $mem$$Address);
 7721   %}
 7722   ins_pipe(ialu_reg_reg_fat);
 7723 %}
 7724 
 7725 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7726 %{
 7727   predicate(CompressedOops::shift() == 0);
 7728   match(Set dst mem);
 7729 
 7730   ins_cost(110);
 7731   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7732   ins_encode %{
 7733     __ leaq($dst$$Register, $mem$$Address);
 7734   %}
 7735   ins_pipe(ialu_reg_reg_fat);
 7736 %}
 7737 
 7738 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7739 %{
 7740   predicate(CompressedOops::shift() == 0);
 7741   match(Set dst mem);
 7742 
 7743   ins_cost(110);
 7744   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7745   ins_encode %{
 7746     __ leaq($dst$$Register, $mem$$Address);
 7747   %}
 7748   ins_pipe(ialu_reg_reg_fat);
 7749 %}
 7750 
 7751 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7752 %{
 7753   predicate(CompressedOops::shift() == 0);
 7754   match(Set dst mem);
 7755 
 7756   ins_cost(110);
 7757   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7758   ins_encode %{
 7759     __ leaq($dst$$Register, $mem$$Address);
 7760   %}
 7761   ins_pipe(ialu_reg_reg_fat);
 7762 %}
 7763 
 7764 instruct loadConI(rRegI dst, immI src)
 7765 %{
 7766   match(Set dst src);
 7767 
 7768   format %{ "movl    $dst, $src\t# int" %}
 7769   ins_encode %{
 7770     __ movl($dst$$Register, $src$$constant);
 7771   %}
 7772   ins_pipe(ialu_reg_fat); // XXX
 7773 %}
 7774 
 7775 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7776 %{
 7777   match(Set dst src);
 7778   effect(KILL cr);
 7779 
 7780   ins_cost(50);
 7781   format %{ "xorl    $dst, $dst\t# int" %}
 7782   ins_encode %{
 7783     __ xorl($dst$$Register, $dst$$Register);
 7784   %}
 7785   ins_pipe(ialu_reg);
 7786 %}
 7787 
 7788 instruct loadConL(rRegL dst, immL src)
 7789 %{
 7790   match(Set dst src);
 7791 
 7792   ins_cost(150);
 7793   format %{ "movq    $dst, $src\t# long" %}
 7794   ins_encode %{
 7795     __ mov64($dst$$Register, $src$$constant);
 7796   %}
 7797   ins_pipe(ialu_reg);
 7798 %}
 7799 
 7800 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7801 %{
 7802   match(Set dst src);
 7803   effect(KILL cr);
 7804 
 7805   ins_cost(50);
 7806   format %{ "xorl    $dst, $dst\t# long" %}
 7807   ins_encode %{
 7808     __ xorl($dst$$Register, $dst$$Register);
 7809   %}
 7810   ins_pipe(ialu_reg); // XXX
 7811 %}
 7812 
 7813 instruct loadConUL32(rRegL dst, immUL32 src)
 7814 %{
 7815   match(Set dst src);
 7816 
 7817   ins_cost(60);
 7818   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7819   ins_encode %{
 7820     __ movl($dst$$Register, $src$$constant);
 7821   %}
 7822   ins_pipe(ialu_reg);
 7823 %}
 7824 
 7825 instruct loadConL32(rRegL dst, immL32 src)
 7826 %{
 7827   match(Set dst src);
 7828 
 7829   ins_cost(70);
 7830   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7831   ins_encode %{
 7832     __ movq($dst$$Register, $src$$constant);
 7833   %}
 7834   ins_pipe(ialu_reg);
 7835 %}
 7836 
 7837 instruct loadConP(rRegP dst, immP con) %{
 7838   match(Set dst con);
 7839 
 7840   format %{ "movq    $dst, $con\t# ptr" %}
 7841   ins_encode %{
 7842     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7843   %}
 7844   ins_pipe(ialu_reg_fat); // XXX
 7845 %}
 7846 
 7847 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7848 %{
 7849   match(Set dst src);
 7850   effect(KILL cr);
 7851 
 7852   ins_cost(50);
 7853   format %{ "xorl    $dst, $dst\t# ptr" %}
 7854   ins_encode %{
 7855     __ xorl($dst$$Register, $dst$$Register);
 7856   %}
 7857   ins_pipe(ialu_reg);
 7858 %}
 7859 
 7860 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7861 %{
 7862   match(Set dst src);
 7863   effect(KILL cr);
 7864 
 7865   ins_cost(60);
 7866   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7867   ins_encode %{
 7868     __ movl($dst$$Register, $src$$constant);
 7869   %}
 7870   ins_pipe(ialu_reg);
 7871 %}
 7872 
 7873 instruct loadConF(regF dst, immF con) %{
 7874   match(Set dst con);
 7875   ins_cost(125);
 7876   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7877   ins_encode %{
 7878     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7879   %}
 7880   ins_pipe(pipe_slow);
 7881 %}
 7882 
 7883 instruct loadConH(regF dst, immH con) %{
 7884   match(Set dst con);
 7885   ins_cost(125);
 7886   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7887   ins_encode %{
 7888     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7889   %}
 7890   ins_pipe(pipe_slow);
 7891 %}
 7892 
 7893 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7894   match(Set dst src);
 7895   effect(KILL cr);
 7896   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7897   ins_encode %{
 7898     __ xorq($dst$$Register, $dst$$Register);
 7899   %}
 7900   ins_pipe(ialu_reg);
 7901 %}
 7902 
 7903 instruct loadConN(rRegN dst, immN src) %{
 7904   match(Set dst src);
 7905 
 7906   ins_cost(125);
 7907   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7908   ins_encode %{
 7909     address con = (address)$src$$constant;
 7910     if (con == nullptr) {
 7911       ShouldNotReachHere();
 7912     } else {
 7913       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7914     }
 7915   %}
 7916   ins_pipe(ialu_reg_fat); // XXX
 7917 %}
 7918 
 7919 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7920   match(Set dst src);
 7921 
 7922   ins_cost(125);
 7923   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7924   ins_encode %{
 7925     address con = (address)$src$$constant;
 7926     if (con == nullptr) {
 7927       ShouldNotReachHere();
 7928     } else {
 7929       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7930     }
 7931   %}
 7932   ins_pipe(ialu_reg_fat); // XXX
 7933 %}
 7934 
 7935 instruct loadConF0(regF dst, immF0 src)
 7936 %{
 7937   match(Set dst src);
 7938   ins_cost(100);
 7939 
 7940   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7941   ins_encode %{
 7942     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7943   %}
 7944   ins_pipe(pipe_slow);
 7945 %}
 7946 
 7947 // Use the same format since predicate() can not be used here.
 7948 instruct loadConD(regD dst, immD con) %{
 7949   match(Set dst con);
 7950   ins_cost(125);
 7951   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7952   ins_encode %{
 7953     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7954   %}
 7955   ins_pipe(pipe_slow);
 7956 %}
 7957 
 7958 instruct loadConD0(regD dst, immD0 src)
 7959 %{
 7960   match(Set dst src);
 7961   ins_cost(100);
 7962 
 7963   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7964   ins_encode %{
 7965     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7966   %}
 7967   ins_pipe(pipe_slow);
 7968 %}
 7969 
 7970 instruct loadSSI(rRegI dst, stackSlotI src)
 7971 %{
 7972   match(Set dst src);
 7973 
 7974   ins_cost(125);
 7975   format %{ "movl    $dst, $src\t# int stk" %}
 7976   ins_encode %{
 7977     __ movl($dst$$Register, $src$$Address);
 7978   %}
 7979   ins_pipe(ialu_reg_mem);
 7980 %}
 7981 
 7982 instruct loadSSL(rRegL dst, stackSlotL src)
 7983 %{
 7984   match(Set dst src);
 7985 
 7986   ins_cost(125);
 7987   format %{ "movq    $dst, $src\t# long stk" %}
 7988   ins_encode %{
 7989     __ movq($dst$$Register, $src$$Address);
 7990   %}
 7991   ins_pipe(ialu_reg_mem);
 7992 %}
 7993 
 7994 instruct loadSSP(rRegP dst, stackSlotP src)
 7995 %{
 7996   match(Set dst src);
 7997 
 7998   ins_cost(125);
 7999   format %{ "movq    $dst, $src\t# ptr stk" %}
 8000   ins_encode %{
 8001     __ movq($dst$$Register, $src$$Address);
 8002   %}
 8003   ins_pipe(ialu_reg_mem);
 8004 %}
 8005 
 8006 instruct loadSSF(regF dst, stackSlotF src)
 8007 %{
 8008   match(Set dst src);
 8009 
 8010   ins_cost(125);
 8011   format %{ "movss   $dst, $src\t# float stk" %}
 8012   ins_encode %{
 8013     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 8014   %}
 8015   ins_pipe(pipe_slow); // XXX
 8016 %}
 8017 
 8018 // Use the same format since predicate() can not be used here.
 8019 instruct loadSSD(regD dst, stackSlotD src)
 8020 %{
 8021   match(Set dst src);
 8022 
 8023   ins_cost(125);
 8024   format %{ "movsd   $dst, $src\t# double stk" %}
 8025   ins_encode  %{
 8026     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 8027   %}
 8028   ins_pipe(pipe_slow); // XXX
 8029 %}
 8030 
 8031 // Prefetch instructions for allocation.
 8032 // Must be safe to execute with invalid address (cannot fault).
 8033 
 8034 instruct prefetchAlloc( memory mem ) %{
 8035   predicate(AllocatePrefetchInstr==3);
 8036   match(PrefetchAllocation mem);
 8037   ins_cost(125);
 8038 
 8039   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 8040   ins_encode %{
 8041     __ prefetchw($mem$$Address);
 8042   %}
 8043   ins_pipe(ialu_mem);
 8044 %}
 8045 
 8046 instruct prefetchAllocNTA( memory mem ) %{
 8047   predicate(AllocatePrefetchInstr==0);
 8048   match(PrefetchAllocation mem);
 8049   ins_cost(125);
 8050 
 8051   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 8052   ins_encode %{
 8053     __ prefetchnta($mem$$Address);
 8054   %}
 8055   ins_pipe(ialu_mem);
 8056 %}
 8057 
 8058 instruct prefetchAllocT0( memory mem ) %{
 8059   predicate(AllocatePrefetchInstr==1);
 8060   match(PrefetchAllocation mem);
 8061   ins_cost(125);
 8062 
 8063   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 8064   ins_encode %{
 8065     __ prefetcht0($mem$$Address);
 8066   %}
 8067   ins_pipe(ialu_mem);
 8068 %}
 8069 
 8070 instruct prefetchAllocT2( memory mem ) %{
 8071   predicate(AllocatePrefetchInstr==2);
 8072   match(PrefetchAllocation mem);
 8073   ins_cost(125);
 8074 
 8075   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 8076   ins_encode %{
 8077     __ prefetcht2($mem$$Address);
 8078   %}
 8079   ins_pipe(ialu_mem);
 8080 %}
 8081 
 8082 //----------Store Instructions-------------------------------------------------
 8083 
 8084 // Store Byte
 8085 instruct storeB(memory mem, rRegI src)
 8086 %{
 8087   match(Set mem (StoreB mem src));
 8088 
 8089   ins_cost(125); // XXX
 8090   format %{ "movb    $mem, $src\t# byte" %}
 8091   ins_encode %{
 8092     __ movb($mem$$Address, $src$$Register);
 8093   %}
 8094   ins_pipe(ialu_mem_reg);
 8095 %}
 8096 
 8097 // Store Char/Short
 8098 instruct storeC(memory mem, rRegI src)
 8099 %{
 8100   match(Set mem (StoreC mem src));
 8101 
 8102   ins_cost(125); // XXX
 8103   format %{ "movw    $mem, $src\t# char/short" %}
 8104   ins_encode %{
 8105     __ movw($mem$$Address, $src$$Register);
 8106   %}
 8107   ins_pipe(ialu_mem_reg);
 8108 %}
 8109 
 8110 // Store Integer
 8111 instruct storeI(memory mem, rRegI src)
 8112 %{
 8113   match(Set mem (StoreI mem src));
 8114 
 8115   ins_cost(125); // XXX
 8116   format %{ "movl    $mem, $src\t# int" %}
 8117   ins_encode %{
 8118     __ movl($mem$$Address, $src$$Register);
 8119   %}
 8120   ins_pipe(ialu_mem_reg);
 8121 %}
 8122 
 8123 // Store Long
 8124 instruct storeL(memory mem, rRegL src)
 8125 %{
 8126   match(Set mem (StoreL mem src));
 8127 
 8128   ins_cost(125); // XXX
 8129   format %{ "movq    $mem, $src\t# long" %}
 8130   ins_encode %{
 8131     __ movq($mem$$Address, $src$$Register);
 8132   %}
 8133   ins_pipe(ialu_mem_reg); // XXX
 8134 %}
 8135 
 8136 // Store Pointer
 8137 instruct storeP(memory mem, any_RegP src)
 8138 %{
 8139   predicate(n->as_Store()->barrier_data() == 0);
 8140   match(Set mem (StoreP mem src));
 8141 
 8142   ins_cost(125); // XXX
 8143   format %{ "movq    $mem, $src\t# ptr" %}
 8144   ins_encode %{
 8145     __ movq($mem$$Address, $src$$Register);
 8146   %}
 8147   ins_pipe(ialu_mem_reg);
 8148 %}
 8149 
 8150 instruct storeImmP0(memory mem, immP0 zero)
 8151 %{
 8152   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8153   match(Set mem (StoreP mem zero));
 8154 
 8155   ins_cost(125); // XXX
 8156   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8157   ins_encode %{
 8158     __ movq($mem$$Address, r12);
 8159   %}
 8160   ins_pipe(ialu_mem_reg);
 8161 %}
 8162 
 8163 // Store Null Pointer, mark word, or other simple pointer constant.
 8164 instruct storeImmP(memory mem, immP31 src)
 8165 %{
 8166   predicate(n->as_Store()->barrier_data() == 0);
 8167   match(Set mem (StoreP mem src));
 8168 
 8169   ins_cost(150); // XXX
 8170   format %{ "movq    $mem, $src\t# ptr" %}
 8171   ins_encode %{
 8172     __ movq($mem$$Address, $src$$constant);
 8173   %}
 8174   ins_pipe(ialu_mem_imm);
 8175 %}
 8176 
 8177 // Store Compressed Pointer
 8178 instruct storeN(memory mem, rRegN src)
 8179 %{
 8180   predicate(n->as_Store()->barrier_data() == 0);
 8181   match(Set mem (StoreN mem src));
 8182 
 8183   ins_cost(125); // XXX
 8184   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8185   ins_encode %{
 8186     __ movl($mem$$Address, $src$$Register);
 8187   %}
 8188   ins_pipe(ialu_mem_reg);
 8189 %}
 8190 
 8191 instruct storeNKlass(memory mem, rRegN src)
 8192 %{
 8193   match(Set mem (StoreNKlass mem src));
 8194 
 8195   ins_cost(125); // XXX
 8196   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8197   ins_encode %{
 8198     __ movl($mem$$Address, $src$$Register);
 8199   %}
 8200   ins_pipe(ialu_mem_reg);
 8201 %}
 8202 
 8203 instruct storeImmN0(memory mem, immN0 zero)
 8204 %{
 8205   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8206   match(Set mem (StoreN mem zero));
 8207 
 8208   ins_cost(125); // XXX
 8209   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8210   ins_encode %{
 8211     __ movl($mem$$Address, r12);
 8212   %}
 8213   ins_pipe(ialu_mem_reg);
 8214 %}
 8215 
 8216 instruct storeImmN(memory mem, immN src)
 8217 %{
 8218   predicate(n->as_Store()->barrier_data() == 0);
 8219   match(Set mem (StoreN mem src));
 8220 
 8221   ins_cost(150); // XXX
 8222   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8223   ins_encode %{
 8224     address con = (address)$src$$constant;
 8225     if (con == nullptr) {
 8226       __ movl($mem$$Address, 0);
 8227     } else {
 8228       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8229     }
 8230   %}
 8231   ins_pipe(ialu_mem_imm);
 8232 %}
 8233 
 8234 instruct storeImmNKlass(memory mem, immNKlass src)
 8235 %{
 8236   match(Set mem (StoreNKlass mem src));
 8237 
 8238   ins_cost(150); // XXX
 8239   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8240   ins_encode %{
 8241     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8242   %}
 8243   ins_pipe(ialu_mem_imm);
 8244 %}
 8245 
 8246 // Store Integer Immediate
 8247 instruct storeImmI0(memory mem, immI_0 zero)
 8248 %{
 8249   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8250   match(Set mem (StoreI mem zero));
 8251 
 8252   ins_cost(125); // XXX
 8253   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8254   ins_encode %{
 8255     __ movl($mem$$Address, r12);
 8256   %}
 8257   ins_pipe(ialu_mem_reg);
 8258 %}
 8259 
 8260 instruct storeImmI(memory mem, immI src)
 8261 %{
 8262   match(Set mem (StoreI mem src));
 8263 
 8264   ins_cost(150);
 8265   format %{ "movl    $mem, $src\t# int" %}
 8266   ins_encode %{
 8267     __ movl($mem$$Address, $src$$constant);
 8268   %}
 8269   ins_pipe(ialu_mem_imm);
 8270 %}
 8271 
 8272 // Store Long Immediate
 8273 instruct storeImmL0(memory mem, immL0 zero)
 8274 %{
 8275   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8276   match(Set mem (StoreL mem zero));
 8277 
 8278   ins_cost(125); // XXX
 8279   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8280   ins_encode %{
 8281     __ movq($mem$$Address, r12);
 8282   %}
 8283   ins_pipe(ialu_mem_reg);
 8284 %}
 8285 
 8286 instruct storeImmL(memory mem, immL32 src)
 8287 %{
 8288   match(Set mem (StoreL mem src));
 8289 
 8290   ins_cost(150);
 8291   format %{ "movq    $mem, $src\t# long" %}
 8292   ins_encode %{
 8293     __ movq($mem$$Address, $src$$constant);
 8294   %}
 8295   ins_pipe(ialu_mem_imm);
 8296 %}
 8297 
 8298 // Store Short/Char Immediate
 8299 instruct storeImmC0(memory mem, immI_0 zero)
 8300 %{
 8301   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8302   match(Set mem (StoreC mem zero));
 8303 
 8304   ins_cost(125); // XXX
 8305   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8306   ins_encode %{
 8307     __ movw($mem$$Address, r12);
 8308   %}
 8309   ins_pipe(ialu_mem_reg);
 8310 %}
 8311 
 8312 instruct storeImmI16(memory mem, immI16 src)
 8313 %{
 8314   predicate(UseStoreImmI16);
 8315   match(Set mem (StoreC mem src));
 8316 
 8317   ins_cost(150);
 8318   format %{ "movw    $mem, $src\t# short/char" %}
 8319   ins_encode %{
 8320     __ movw($mem$$Address, $src$$constant);
 8321   %}
 8322   ins_pipe(ialu_mem_imm);
 8323 %}
 8324 
 8325 // Store Byte Immediate
 8326 instruct storeImmB0(memory mem, immI_0 zero)
 8327 %{
 8328   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8329   match(Set mem (StoreB mem zero));
 8330 
 8331   ins_cost(125); // XXX
 8332   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8333   ins_encode %{
 8334     __ movb($mem$$Address, r12);
 8335   %}
 8336   ins_pipe(ialu_mem_reg);
 8337 %}
 8338 
 8339 instruct storeImmB(memory mem, immI8 src)
 8340 %{
 8341   match(Set mem (StoreB mem src));
 8342 
 8343   ins_cost(150); // XXX
 8344   format %{ "movb    $mem, $src\t# byte" %}
 8345   ins_encode %{
 8346     __ movb($mem$$Address, $src$$constant);
 8347   %}
 8348   ins_pipe(ialu_mem_imm);
 8349 %}
 8350 
 8351 // Store Float
 8352 instruct storeF(memory mem, regF src)
 8353 %{
 8354   match(Set mem (StoreF mem src));
 8355 
 8356   ins_cost(95); // XXX
 8357   format %{ "movss   $mem, $src\t# float" %}
 8358   ins_encode %{
 8359     __ movflt($mem$$Address, $src$$XMMRegister);
 8360   %}
 8361   ins_pipe(pipe_slow); // XXX
 8362 %}
 8363 
 8364 // Store immediate Float value (it is faster than store from XMM register)
 8365 instruct storeF0(memory mem, immF0 zero)
 8366 %{
 8367   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8368   match(Set mem (StoreF mem zero));
 8369 
 8370   ins_cost(25); // XXX
 8371   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8372   ins_encode %{
 8373     __ movl($mem$$Address, r12);
 8374   %}
 8375   ins_pipe(ialu_mem_reg);
 8376 %}
 8377 
 8378 instruct storeF_imm(memory mem, immF src)
 8379 %{
 8380   match(Set mem (StoreF mem src));
 8381 
 8382   ins_cost(50);
 8383   format %{ "movl    $mem, $src\t# float" %}
 8384   ins_encode %{
 8385     __ movl($mem$$Address, jint_cast($src$$constant));
 8386   %}
 8387   ins_pipe(ialu_mem_imm);
 8388 %}
 8389 
 8390 // Store Double
 8391 instruct storeD(memory mem, regD src)
 8392 %{
 8393   match(Set mem (StoreD mem src));
 8394 
 8395   ins_cost(95); // XXX
 8396   format %{ "movsd   $mem, $src\t# double" %}
 8397   ins_encode %{
 8398     __ movdbl($mem$$Address, $src$$XMMRegister);
 8399   %}
 8400   ins_pipe(pipe_slow); // XXX
 8401 %}
 8402 
 8403 // Store immediate double 0.0 (it is faster than store from XMM register)
 8404 instruct storeD0_imm(memory mem, immD0 src)
 8405 %{
 8406   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8407   match(Set mem (StoreD mem src));
 8408 
 8409   ins_cost(50);
 8410   format %{ "movq    $mem, $src\t# double 0." %}
 8411   ins_encode %{
 8412     __ movq($mem$$Address, $src$$constant);
 8413   %}
 8414   ins_pipe(ialu_mem_imm);
 8415 %}
 8416 
 8417 instruct storeD0(memory mem, immD0 zero)
 8418 %{
 8419   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8420   match(Set mem (StoreD mem zero));
 8421 
 8422   ins_cost(25); // XXX
 8423   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8424   ins_encode %{
 8425     __ movq($mem$$Address, r12);
 8426   %}
 8427   ins_pipe(ialu_mem_reg);
 8428 %}
 8429 
 8430 instruct storeSSI(stackSlotI dst, rRegI src)
 8431 %{
 8432   match(Set dst src);
 8433 
 8434   ins_cost(100);
 8435   format %{ "movl    $dst, $src\t# int stk" %}
 8436   ins_encode %{
 8437     __ movl($dst$$Address, $src$$Register);
 8438   %}
 8439   ins_pipe( ialu_mem_reg );
 8440 %}
 8441 
 8442 instruct storeSSL(stackSlotL dst, rRegL src)
 8443 %{
 8444   match(Set dst src);
 8445 
 8446   ins_cost(100);
 8447   format %{ "movq    $dst, $src\t# long stk" %}
 8448   ins_encode %{
 8449     __ movq($dst$$Address, $src$$Register);
 8450   %}
 8451   ins_pipe(ialu_mem_reg);
 8452 %}
 8453 
 8454 instruct storeSSP(stackSlotP dst, rRegP src)
 8455 %{
 8456   match(Set dst src);
 8457 
 8458   ins_cost(100);
 8459   format %{ "movq    $dst, $src\t# ptr stk" %}
 8460   ins_encode %{
 8461     __ movq($dst$$Address, $src$$Register);
 8462   %}
 8463   ins_pipe(ialu_mem_reg);
 8464 %}
 8465 
 8466 instruct storeSSF(stackSlotF dst, regF src)
 8467 %{
 8468   match(Set dst src);
 8469 
 8470   ins_cost(95); // XXX
 8471   format %{ "movss   $dst, $src\t# float stk" %}
 8472   ins_encode %{
 8473     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8474   %}
 8475   ins_pipe(pipe_slow); // XXX
 8476 %}
 8477 
 8478 instruct storeSSD(stackSlotD dst, regD src)
 8479 %{
 8480   match(Set dst src);
 8481 
 8482   ins_cost(95); // XXX
 8483   format %{ "movsd   $dst, $src\t# double stk" %}
 8484   ins_encode %{
 8485     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8486   %}
 8487   ins_pipe(pipe_slow); // XXX
 8488 %}
 8489 
 8490 instruct cacheWB(indirect addr)
 8491 %{
 8492   predicate(VM_Version::supports_data_cache_line_flush());
 8493   match(CacheWB addr);
 8494 
 8495   ins_cost(100);
 8496   format %{"cache wb $addr" %}
 8497   ins_encode %{
 8498     assert($addr->index_position() < 0, "should be");
 8499     assert($addr$$disp == 0, "should be");
 8500     __ cache_wb(Address($addr$$base$$Register, 0));
 8501   %}
 8502   ins_pipe(pipe_slow); // XXX
 8503 %}
 8504 
 8505 instruct cacheWBPreSync()
 8506 %{
 8507   predicate(VM_Version::supports_data_cache_line_flush());
 8508   match(CacheWBPreSync);
 8509 
 8510   ins_cost(100);
 8511   format %{"cache wb presync" %}
 8512   ins_encode %{
 8513     __ cache_wbsync(true);
 8514   %}
 8515   ins_pipe(pipe_slow); // XXX
 8516 %}
 8517 
 8518 instruct cacheWBPostSync()
 8519 %{
 8520   predicate(VM_Version::supports_data_cache_line_flush());
 8521   match(CacheWBPostSync);
 8522 
 8523   ins_cost(100);
 8524   format %{"cache wb postsync" %}
 8525   ins_encode %{
 8526     __ cache_wbsync(false);
 8527   %}
 8528   ins_pipe(pipe_slow); // XXX
 8529 %}
 8530 
 8531 //----------BSWAP Instructions-------------------------------------------------
 8532 instruct bytes_reverse_int(rRegI dst) %{
 8533   match(Set dst (ReverseBytesI dst));
 8534 
 8535   format %{ "bswapl  $dst" %}
 8536   ins_encode %{
 8537     __ bswapl($dst$$Register);
 8538   %}
 8539   ins_pipe( ialu_reg );
 8540 %}
 8541 
 8542 instruct bytes_reverse_long(rRegL dst) %{
 8543   match(Set dst (ReverseBytesL dst));
 8544 
 8545   format %{ "bswapq  $dst" %}
 8546   ins_encode %{
 8547     __ bswapq($dst$$Register);
 8548   %}
 8549   ins_pipe( ialu_reg);
 8550 %}
 8551 
 8552 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8553   match(Set dst (ReverseBytesUS dst));
 8554   effect(KILL cr);
 8555 
 8556   format %{ "bswapl  $dst\n\t"
 8557             "shrl    $dst,16\n\t" %}
 8558   ins_encode %{
 8559     __ bswapl($dst$$Register);
 8560     __ shrl($dst$$Register, 16);
 8561   %}
 8562   ins_pipe( ialu_reg );
 8563 %}
 8564 
 8565 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8566   match(Set dst (ReverseBytesS dst));
 8567   effect(KILL cr);
 8568 
 8569   format %{ "bswapl  $dst\n\t"
 8570             "sar     $dst,16\n\t" %}
 8571   ins_encode %{
 8572     __ bswapl($dst$$Register);
 8573     __ sarl($dst$$Register, 16);
 8574   %}
 8575   ins_pipe( ialu_reg );
 8576 %}
 8577 
 8578 //---------- Zeros Count Instructions ------------------------------------------
 8579 
 8580 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8581   predicate(UseCountLeadingZerosInstruction);
 8582   match(Set dst (CountLeadingZerosI src));
 8583   effect(KILL cr);
 8584 
 8585   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8586   ins_encode %{
 8587     __ lzcntl($dst$$Register, $src$$Register);
 8588   %}
 8589   ins_pipe(ialu_reg);
 8590 %}
 8591 
 8592 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8593   predicate(UseCountLeadingZerosInstruction);
 8594   match(Set dst (CountLeadingZerosI (LoadI src)));
 8595   effect(KILL cr);
 8596   ins_cost(175);
 8597   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8598   ins_encode %{
 8599     __ lzcntl($dst$$Register, $src$$Address);
 8600   %}
 8601   ins_pipe(ialu_reg_mem);
 8602 %}
 8603 
 8604 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8605   predicate(!UseCountLeadingZerosInstruction);
 8606   match(Set dst (CountLeadingZerosI src));
 8607   effect(KILL cr);
 8608 
 8609   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8610             "jnz     skip\n\t"
 8611             "movl    $dst, -1\n"
 8612       "skip:\n\t"
 8613             "negl    $dst\n\t"
 8614             "addl    $dst, 31" %}
 8615   ins_encode %{
 8616     Register Rdst = $dst$$Register;
 8617     Register Rsrc = $src$$Register;
 8618     Label skip;
 8619     __ bsrl(Rdst, Rsrc);
 8620     __ jccb(Assembler::notZero, skip);
 8621     __ movl(Rdst, -1);
 8622     __ bind(skip);
 8623     __ negl(Rdst);
 8624     __ addl(Rdst, BitsPerInt - 1);
 8625   %}
 8626   ins_pipe(ialu_reg);
 8627 %}
 8628 
 8629 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8630   predicate(UseCountLeadingZerosInstruction);
 8631   match(Set dst (CountLeadingZerosL src));
 8632   effect(KILL cr);
 8633 
 8634   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8635   ins_encode %{
 8636     __ lzcntq($dst$$Register, $src$$Register);
 8637   %}
 8638   ins_pipe(ialu_reg);
 8639 %}
 8640 
 8641 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8642   predicate(UseCountLeadingZerosInstruction);
 8643   match(Set dst (CountLeadingZerosL (LoadL src)));
 8644   effect(KILL cr);
 8645   ins_cost(175);
 8646   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8647   ins_encode %{
 8648     __ lzcntq($dst$$Register, $src$$Address);
 8649   %}
 8650   ins_pipe(ialu_reg_mem);
 8651 %}
 8652 
 8653 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8654   predicate(!UseCountLeadingZerosInstruction);
 8655   match(Set dst (CountLeadingZerosL src));
 8656   effect(KILL cr);
 8657 
 8658   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8659             "jnz     skip\n\t"
 8660             "movl    $dst, -1\n"
 8661       "skip:\n\t"
 8662             "negl    $dst\n\t"
 8663             "addl    $dst, 63" %}
 8664   ins_encode %{
 8665     Register Rdst = $dst$$Register;
 8666     Register Rsrc = $src$$Register;
 8667     Label skip;
 8668     __ bsrq(Rdst, Rsrc);
 8669     __ jccb(Assembler::notZero, skip);
 8670     __ movl(Rdst, -1);
 8671     __ bind(skip);
 8672     __ negl(Rdst);
 8673     __ addl(Rdst, BitsPerLong - 1);
 8674   %}
 8675   ins_pipe(ialu_reg);
 8676 %}
 8677 
 8678 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8679   predicate(UseCountTrailingZerosInstruction);
 8680   match(Set dst (CountTrailingZerosI src));
 8681   effect(KILL cr);
 8682 
 8683   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8684   ins_encode %{
 8685     __ tzcntl($dst$$Register, $src$$Register);
 8686   %}
 8687   ins_pipe(ialu_reg);
 8688 %}
 8689 
 8690 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8691   predicate(UseCountTrailingZerosInstruction);
 8692   match(Set dst (CountTrailingZerosI (LoadI src)));
 8693   effect(KILL cr);
 8694   ins_cost(175);
 8695   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8696   ins_encode %{
 8697     __ tzcntl($dst$$Register, $src$$Address);
 8698   %}
 8699   ins_pipe(ialu_reg_mem);
 8700 %}
 8701 
 8702 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8703   predicate(!UseCountTrailingZerosInstruction);
 8704   match(Set dst (CountTrailingZerosI src));
 8705   effect(KILL cr);
 8706 
 8707   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8708             "jnz     done\n\t"
 8709             "movl    $dst, 32\n"
 8710       "done:" %}
 8711   ins_encode %{
 8712     Register Rdst = $dst$$Register;
 8713     Label done;
 8714     __ bsfl(Rdst, $src$$Register);
 8715     __ jccb(Assembler::notZero, done);
 8716     __ movl(Rdst, BitsPerInt);
 8717     __ bind(done);
 8718   %}
 8719   ins_pipe(ialu_reg);
 8720 %}
 8721 
 8722 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8723   predicate(UseCountTrailingZerosInstruction);
 8724   match(Set dst (CountTrailingZerosL src));
 8725   effect(KILL cr);
 8726 
 8727   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8728   ins_encode %{
 8729     __ tzcntq($dst$$Register, $src$$Register);
 8730   %}
 8731   ins_pipe(ialu_reg);
 8732 %}
 8733 
 8734 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8735   predicate(UseCountTrailingZerosInstruction);
 8736   match(Set dst (CountTrailingZerosL (LoadL src)));
 8737   effect(KILL cr);
 8738   ins_cost(175);
 8739   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8740   ins_encode %{
 8741     __ tzcntq($dst$$Register, $src$$Address);
 8742   %}
 8743   ins_pipe(ialu_reg_mem);
 8744 %}
 8745 
 8746 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8747   predicate(!UseCountTrailingZerosInstruction);
 8748   match(Set dst (CountTrailingZerosL src));
 8749   effect(KILL cr);
 8750 
 8751   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8752             "jnz     done\n\t"
 8753             "movl    $dst, 64\n"
 8754       "done:" %}
 8755   ins_encode %{
 8756     Register Rdst = $dst$$Register;
 8757     Label done;
 8758     __ bsfq(Rdst, $src$$Register);
 8759     __ jccb(Assembler::notZero, done);
 8760     __ movl(Rdst, BitsPerLong);
 8761     __ bind(done);
 8762   %}
 8763   ins_pipe(ialu_reg);
 8764 %}
 8765 
 8766 //--------------- Reverse Operation Instructions ----------------
 8767 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8768   predicate(!VM_Version::supports_gfni());
 8769   match(Set dst (ReverseI src));
 8770   effect(TEMP dst, TEMP rtmp, KILL cr);
 8771   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8772   ins_encode %{
 8773     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8774   %}
 8775   ins_pipe( ialu_reg );
 8776 %}
 8777 
 8778 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8779   predicate(VM_Version::supports_gfni());
 8780   match(Set dst (ReverseI src));
 8781   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8782   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8783   ins_encode %{
 8784     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8785   %}
 8786   ins_pipe( ialu_reg );
 8787 %}
 8788 
 8789 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8790   predicate(!VM_Version::supports_gfni());
 8791   match(Set dst (ReverseL src));
 8792   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8793   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8794   ins_encode %{
 8795     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8796   %}
 8797   ins_pipe( ialu_reg );
 8798 %}
 8799 
 8800 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8801   predicate(VM_Version::supports_gfni());
 8802   match(Set dst (ReverseL src));
 8803   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8804   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8805   ins_encode %{
 8806     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8807   %}
 8808   ins_pipe( ialu_reg );
 8809 %}
 8810 
 8811 //---------- Population Count Instructions -------------------------------------
 8812 
 8813 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8814   predicate(UsePopCountInstruction);
 8815   match(Set dst (PopCountI src));
 8816   effect(KILL cr);
 8817 
 8818   format %{ "popcnt  $dst, $src" %}
 8819   ins_encode %{
 8820     __ popcntl($dst$$Register, $src$$Register);
 8821   %}
 8822   ins_pipe(ialu_reg);
 8823 %}
 8824 
 8825 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8826   predicate(UsePopCountInstruction);
 8827   match(Set dst (PopCountI (LoadI mem)));
 8828   effect(KILL cr);
 8829 
 8830   format %{ "popcnt  $dst, $mem" %}
 8831   ins_encode %{
 8832     __ popcntl($dst$$Register, $mem$$Address);
 8833   %}
 8834   ins_pipe(ialu_reg);
 8835 %}
 8836 
 8837 // Note: Long.bitCount(long) returns an int.
 8838 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8839   predicate(UsePopCountInstruction);
 8840   match(Set dst (PopCountL src));
 8841   effect(KILL cr);
 8842 
 8843   format %{ "popcnt  $dst, $src" %}
 8844   ins_encode %{
 8845     __ popcntq($dst$$Register, $src$$Register);
 8846   %}
 8847   ins_pipe(ialu_reg);
 8848 %}
 8849 
 8850 // Note: Long.bitCount(long) returns an int.
 8851 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8852   predicate(UsePopCountInstruction);
 8853   match(Set dst (PopCountL (LoadL mem)));
 8854   effect(KILL cr);
 8855 
 8856   format %{ "popcnt  $dst, $mem" %}
 8857   ins_encode %{
 8858     __ popcntq($dst$$Register, $mem$$Address);
 8859   %}
 8860   ins_pipe(ialu_reg);
 8861 %}
 8862 
 8863 
 8864 //----------MemBar Instructions-----------------------------------------------
 8865 // Memory barrier flavors
 8866 
 8867 instruct membar_acquire()
 8868 %{
 8869   match(MemBarAcquire);
 8870   match(LoadFence);
 8871   ins_cost(0);
 8872 
 8873   size(0);
 8874   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8875   ins_encode();
 8876   ins_pipe(empty);
 8877 %}
 8878 
 8879 instruct membar_acquire_lock()
 8880 %{
 8881   match(MemBarAcquireLock);
 8882   ins_cost(0);
 8883 
 8884   size(0);
 8885   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8886   ins_encode();
 8887   ins_pipe(empty);
 8888 %}
 8889 
 8890 instruct membar_release()
 8891 %{
 8892   match(MemBarRelease);
 8893   match(StoreFence);
 8894   ins_cost(0);
 8895 
 8896   size(0);
 8897   format %{ "MEMBAR-release ! (empty encoding)" %}
 8898   ins_encode();
 8899   ins_pipe(empty);
 8900 %}
 8901 
 8902 instruct membar_release_lock()
 8903 %{
 8904   match(MemBarReleaseLock);
 8905   ins_cost(0);
 8906 
 8907   size(0);
 8908   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8909   ins_encode();
 8910   ins_pipe(empty);
 8911 %}
 8912 
 8913 instruct membar_storeload(rFlagsReg cr) %{
 8914   match(MemBarStoreLoad);
 8915   effect(KILL cr);
 8916   ins_cost(400);
 8917 
 8918   format %{
 8919     $$template
 8920     $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
 8921   %}
 8922   ins_encode %{
 8923     __ membar(Assembler::StoreLoad);
 8924   %}
 8925   ins_pipe(pipe_slow);
 8926 %}
 8927 
 8928 instruct membar_volatile(rFlagsReg cr) %{
 8929   match(MemBarVolatile);
 8930   effect(KILL cr);
 8931   ins_cost(400);
 8932 
 8933   format %{
 8934     $$template
 8935     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8936   %}
 8937   ins_encode %{
 8938     __ membar(Assembler::StoreLoad);
 8939   %}
 8940   ins_pipe(pipe_slow);
 8941 %}
 8942 
 8943 instruct unnecessary_membar_volatile()
 8944 %{
 8945   match(MemBarVolatile);
 8946   predicate(Matcher::post_store_load_barrier(n));
 8947   ins_cost(0);
 8948 
 8949   size(0);
 8950   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8951   ins_encode();
 8952   ins_pipe(empty);
 8953 %}
 8954 
 8955 instruct membar_full(rFlagsReg cr) %{
 8956   match(MemBarFull);
 8957   effect(KILL cr);
 8958   ins_cost(400);
 8959 
 8960   format %{
 8961     $$template
 8962     $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
 8963   %}
 8964   ins_encode %{
 8965     __ membar(Assembler::StoreLoad);
 8966   %}
 8967   ins_pipe(pipe_slow);
 8968 %}
 8969 
 8970 instruct membar_storestore() %{
 8971   match(MemBarStoreStore);
 8972   match(StoreStoreFence);
 8973   ins_cost(0);
 8974 
 8975   size(0);
 8976   format %{ "MEMBAR-storestore (empty encoding)" %}
 8977   ins_encode( );
 8978   ins_pipe(empty);
 8979 %}
 8980 
 8981 //----------Move Instructions--------------------------------------------------
 8982 
 8983 instruct castX2P(rRegP dst, rRegL src)
 8984 %{
 8985   match(Set dst (CastX2P src));
 8986 
 8987   format %{ "movq    $dst, $src\t# long->ptr" %}
 8988   ins_encode %{
 8989     if ($dst$$reg != $src$$reg) {
 8990       __ movptr($dst$$Register, $src$$Register);
 8991     }
 8992   %}
 8993   ins_pipe(ialu_reg_reg); // XXX
 8994 %}
 8995 
 8996 instruct castI2N(rRegN dst, rRegI src)
 8997 %{
 8998   match(Set dst (CastI2N src));
 8999 
 9000   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 9001   ins_encode %{
 9002     if ($dst$$reg != $src$$reg) {
 9003       __ movl($dst$$Register, $src$$Register);
 9004     }
 9005   %}
 9006   ins_pipe(ialu_reg_reg); // XXX
 9007 %}
 9008 
 9009 instruct castN2X(rRegL dst, rRegN src)
 9010 %{
 9011   match(Set dst (CastP2X src));
 9012 
 9013   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9014   ins_encode %{
 9015     if ($dst$$reg != $src$$reg) {
 9016       __ movptr($dst$$Register, $src$$Register);
 9017     }
 9018   %}
 9019   ins_pipe(ialu_reg_reg); // XXX
 9020 %}
 9021 
 9022 instruct castP2X(rRegL dst, rRegP src)
 9023 %{
 9024   match(Set dst (CastP2X src));
 9025 
 9026   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9027   ins_encode %{
 9028     if ($dst$$reg != $src$$reg) {
 9029       __ movptr($dst$$Register, $src$$Register);
 9030     }
 9031   %}
 9032   ins_pipe(ialu_reg_reg); // XXX
 9033 %}
 9034 
 9035 // Convert oop into int for vectors alignment masking
 9036 instruct convP2I(rRegI dst, rRegP src)
 9037 %{
 9038   match(Set dst (ConvL2I (CastP2X src)));
 9039 
 9040   format %{ "movl    $dst, $src\t# ptr -> int" %}
 9041   ins_encode %{
 9042     __ movl($dst$$Register, $src$$Register);
 9043   %}
 9044   ins_pipe(ialu_reg_reg); // XXX
 9045 %}
 9046 
 9047 // Convert compressed oop into int for vectors alignment masking
 9048 // in case of 32bit oops (heap < 4Gb).
 9049 instruct convN2I(rRegI dst, rRegN src)
 9050 %{
 9051   predicate(CompressedOops::shift() == 0);
 9052   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 9053 
 9054   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 9055   ins_encode %{
 9056     __ movl($dst$$Register, $src$$Register);
 9057   %}
 9058   ins_pipe(ialu_reg_reg); // XXX
 9059 %}
 9060 
 9061 // Convert oop pointer into compressed form
 9062 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 9063   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 9064   match(Set dst (EncodeP src));
 9065   effect(KILL cr);
 9066   format %{ "encode_heap_oop $dst,$src" %}
 9067   ins_encode %{
 9068     Register s = $src$$Register;
 9069     Register d = $dst$$Register;
 9070     if (s != d) {
 9071       __ movq(d, s);
 9072     }
 9073     __ encode_heap_oop(d);
 9074   %}
 9075   ins_pipe(ialu_reg_long);
 9076 %}
 9077 
 9078 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9079   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 9080   match(Set dst (EncodeP src));
 9081   effect(KILL cr);
 9082   format %{ "encode_heap_oop_not_null $dst,$src" %}
 9083   ins_encode %{
 9084     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 9085   %}
 9086   ins_pipe(ialu_reg_long);
 9087 %}
 9088 
 9089 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 9090   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 9091             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 9092   match(Set dst (DecodeN src));
 9093   effect(KILL cr);
 9094   format %{ "decode_heap_oop $dst,$src" %}
 9095   ins_encode %{
 9096     Register s = $src$$Register;
 9097     Register d = $dst$$Register;
 9098     if (s != d) {
 9099       __ movq(d, s);
 9100     }
 9101     __ decode_heap_oop(d);
 9102   %}
 9103   ins_pipe(ialu_reg_long);
 9104 %}
 9105 
 9106 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9107   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 9108             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 9109   match(Set dst (DecodeN src));
 9110   effect(KILL cr);
 9111   format %{ "decode_heap_oop_not_null $dst,$src" %}
 9112   ins_encode %{
 9113     Register s = $src$$Register;
 9114     Register d = $dst$$Register;
 9115     if (s != d) {
 9116       __ decode_heap_oop_not_null(d, s);
 9117     } else {
 9118       __ decode_heap_oop_not_null(d);
 9119     }
 9120   %}
 9121   ins_pipe(ialu_reg_long);
 9122 %}
 9123 
 9124 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9125   match(Set dst (EncodePKlass src));
 9126   effect(TEMP dst, KILL cr);
 9127   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 9128   ins_encode %{
 9129     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9130   %}
 9131   ins_pipe(ialu_reg_long);
 9132 %}
 9133 
 9134 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9135   match(Set dst (DecodeNKlass src));
 9136   effect(TEMP dst, KILL cr);
 9137   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 9138   ins_encode %{
 9139     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9140   %}
 9141   ins_pipe(ialu_reg_long);
 9142 %}
 9143 
 9144 //----------Conditional Move---------------------------------------------------
 9145 // Jump
 9146 // dummy instruction for generating temp registers
 9147 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 9148   match(Jump (LShiftL switch_val shift));
 9149   ins_cost(350);
 9150   predicate(false);
 9151   effect(TEMP dest);
 9152 
 9153   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9154             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 9155   ins_encode %{
 9156     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9157     // to do that and the compiler is using that register as one it can allocate.
 9158     // So we build it all by hand.
 9159     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9160     // ArrayAddress dispatch(table, index);
 9161     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9162     __ lea($dest$$Register, $constantaddress);
 9163     __ jmp(dispatch);
 9164   %}
 9165   ins_pipe(pipe_jmp);
 9166 %}
 9167 
 9168 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9169   match(Jump (AddL (LShiftL switch_val shift) offset));
 9170   ins_cost(350);
 9171   effect(TEMP dest);
 9172 
 9173   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9174             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9175   ins_encode %{
 9176     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9177     // to do that and the compiler is using that register as one it can allocate.
 9178     // So we build it all by hand.
 9179     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9180     // ArrayAddress dispatch(table, index);
 9181     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9182     __ lea($dest$$Register, $constantaddress);
 9183     __ jmp(dispatch);
 9184   %}
 9185   ins_pipe(pipe_jmp);
 9186 %}
 9187 
 9188 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9189   match(Jump switch_val);
 9190   ins_cost(350);
 9191   effect(TEMP dest);
 9192 
 9193   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9194             "jmp     [$dest + $switch_val]\n\t" %}
 9195   ins_encode %{
 9196     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9197     // to do that and the compiler is using that register as one it can allocate.
 9198     // So we build it all by hand.
 9199     // Address index(noreg, switch_reg, Address::times_1);
 9200     // ArrayAddress dispatch(table, index);
 9201     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9202     __ lea($dest$$Register, $constantaddress);
 9203     __ jmp(dispatch);
 9204   %}
 9205   ins_pipe(pipe_jmp);
 9206 %}
 9207 
 9208 // Conditional move
 9209 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9210 %{
 9211   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9212   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9213 
 9214   ins_cost(100); // XXX
 9215   format %{ "setbn$cop $dst\t# signed, int" %}
 9216   ins_encode %{
 9217     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9218     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9219   %}
 9220   ins_pipe(ialu_reg);
 9221 %}
 9222 
 9223 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9224 %{
 9225   predicate(!UseAPX);
 9226   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9227 
 9228   ins_cost(200); // XXX
 9229   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9230   ins_encode %{
 9231     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9232   %}
 9233   ins_pipe(pipe_cmov_reg);
 9234 %}
 9235 
 9236 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9237 %{
 9238   predicate(UseAPX);
 9239   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9240 
 9241   ins_cost(200);
 9242   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9243   ins_encode %{
 9244     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9245   %}
 9246   ins_pipe(pipe_cmov_reg);
 9247 %}
 9248 
 9249 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9250 %{
 9251   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9252   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9253 
 9254   ins_cost(100); // XXX
 9255   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9256   ins_encode %{
 9257     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9258     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9259   %}
 9260   ins_pipe(ialu_reg);
 9261 %}
 9262 
 9263 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9264   predicate(!UseAPX);
 9265   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9266 
 9267   ins_cost(200); // XXX
 9268   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9269   ins_encode %{
 9270     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9271   %}
 9272   ins_pipe(pipe_cmov_reg);
 9273 %}
 9274 
 9275 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9276   predicate(UseAPX);
 9277   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9278 
 9279   ins_cost(200);
 9280   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9281   ins_encode %{
 9282     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9283   %}
 9284   ins_pipe(pipe_cmov_reg);
 9285 %}
 9286 
 9287 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9288 %{
 9289   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9290   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9291 
 9292   ins_cost(100); // XXX
 9293   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9294   ins_encode %{
 9295     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9296     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9297   %}
 9298   ins_pipe(ialu_reg);
 9299 %}
 9300 
 9301 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9302 %{
 9303   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9304   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9305 
 9306   ins_cost(100); // XXX
 9307   format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
 9308   ins_encode %{
 9309     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9310     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9311   %}
 9312   ins_pipe(ialu_reg);
 9313 %}
 9314 
 9315 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9316   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9317 
 9318   ins_cost(200);
 9319   expand %{
 9320     cmovI_regU(cop, cr, dst, src);
 9321   %}
 9322 %}
 9323 
 9324 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
 9325   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9326 
 9327   ins_cost(200);
 9328   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9329   ins_encode %{
 9330     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9331   %}
 9332   ins_pipe(pipe_cmov_reg);
 9333 %}
 9334 
 9335 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9336   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9337   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9338 
 9339   ins_cost(200); // XXX
 9340   format %{ "cmovpl  $dst, $src\n\t"
 9341             "cmovnel $dst, $src" %}
 9342   ins_encode %{
 9343     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9344     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9345   %}
 9346   ins_pipe(pipe_cmov_reg);
 9347 %}
 9348 
 9349 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9350 // inputs of the CMove
 9351 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9352   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9353   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9354   effect(TEMP dst);
 9355 
 9356   ins_cost(200); // XXX
 9357   format %{ "cmovpl  $dst, $src\n\t"
 9358             "cmovnel $dst, $src" %}
 9359   ins_encode %{
 9360     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9361     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9362   %}
 9363   ins_pipe(pipe_cmov_reg);
 9364 %}
 9365 
 9366 // Conditional move
 9367 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9368   predicate(!UseAPX);
 9369   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9370 
 9371   ins_cost(250); // XXX
 9372   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9373   ins_encode %{
 9374     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9375   %}
 9376   ins_pipe(pipe_cmov_mem);
 9377 %}
 9378 
 9379 // Conditional move
 9380 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9381 %{
 9382   predicate(UseAPX);
 9383   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9384 
 9385   ins_cost(250);
 9386   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9387   ins_encode %{
 9388     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9389   %}
 9390   ins_pipe(pipe_cmov_mem);
 9391 %}
 9392 
 9393 // Conditional move
 9394 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9395 %{
 9396   predicate(!UseAPX);
 9397   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9398 
 9399   ins_cost(250); // XXX
 9400   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9401   ins_encode %{
 9402     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9403   %}
 9404   ins_pipe(pipe_cmov_mem);
 9405 %}
 9406 
 9407 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9408   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9409 
 9410   ins_cost(250);
 9411   expand %{
 9412     cmovI_memU(cop, cr, dst, src);
 9413   %}
 9414 %}
 9415 
 9416 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9417 %{
 9418   predicate(UseAPX);
 9419   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9420 
 9421   ins_cost(250);
 9422   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9423   ins_encode %{
 9424     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9425   %}
 9426   ins_pipe(pipe_cmov_mem);
 9427 %}
 9428 
 9429 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
 9430 %{
 9431   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9432 
 9433   ins_cost(250);
 9434   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9435   ins_encode %{
 9436     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9437   %}
 9438   ins_pipe(pipe_cmov_mem);
 9439 %}
 9440 
 9441 // Conditional move
 9442 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9443 %{
 9444   predicate(!UseAPX);
 9445   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9446 
 9447   ins_cost(200); // XXX
 9448   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9449   ins_encode %{
 9450     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9451   %}
 9452   ins_pipe(pipe_cmov_reg);
 9453 %}
 9454 
 9455 // Conditional move ndd
 9456 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9457 %{
 9458   predicate(UseAPX);
 9459   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9460 
 9461   ins_cost(200);
 9462   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9463   ins_encode %{
 9464     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9465   %}
 9466   ins_pipe(pipe_cmov_reg);
 9467 %}
 9468 
 9469 // Conditional move
 9470 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9471 %{
 9472   predicate(!UseAPX);
 9473   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9474 
 9475   ins_cost(200); // XXX
 9476   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9477   ins_encode %{
 9478     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9479   %}
 9480   ins_pipe(pipe_cmov_reg);
 9481 %}
 9482 
 9483 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9484   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9485 
 9486   ins_cost(200);
 9487   expand %{
 9488     cmovN_regU(cop, cr, dst, src);
 9489   %}
 9490 %}
 9491 
 9492 // Conditional move ndd
 9493 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9494 %{
 9495   predicate(UseAPX);
 9496   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9497 
 9498   ins_cost(200);
 9499   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9500   ins_encode %{
 9501     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9502   %}
 9503   ins_pipe(pipe_cmov_reg);
 9504 %}
 9505 
 9506 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
 9507   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9508 
 9509   ins_cost(200);
 9510   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
 9511   ins_encode %{
 9512     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9513   %}
 9514   ins_pipe(pipe_cmov_reg);
 9515 %}
 9516 
 9517 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9518   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9519   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9520 
 9521   ins_cost(200); // XXX
 9522   format %{ "cmovpl  $dst, $src\n\t"
 9523             "cmovnel $dst, $src" %}
 9524   ins_encode %{
 9525     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9526     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9527   %}
 9528   ins_pipe(pipe_cmov_reg);
 9529 %}
 9530 
 9531 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9532 // inputs of the CMove
 9533 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9534   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9535   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9536 
 9537   ins_cost(200); // XXX
 9538   format %{ "cmovpl  $dst, $src\n\t"
 9539             "cmovnel $dst, $src" %}
 9540   ins_encode %{
 9541     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9542     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9543   %}
 9544   ins_pipe(pipe_cmov_reg);
 9545 %}
 9546 
 9547 // Conditional move
 9548 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9549 %{
 9550   predicate(!UseAPX);
 9551   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9552 
 9553   ins_cost(200); // XXX
 9554   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9555   ins_encode %{
 9556     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9557   %}
 9558   ins_pipe(pipe_cmov_reg);  // XXX
 9559 %}
 9560 
 9561 // Conditional move ndd
 9562 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9563 %{
 9564   predicate(UseAPX);
 9565   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9566 
 9567   ins_cost(200);
 9568   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9569   ins_encode %{
 9570     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9571   %}
 9572   ins_pipe(pipe_cmov_reg);
 9573 %}
 9574 
 9575 // Conditional move
 9576 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9577 %{
 9578   predicate(!UseAPX);
 9579   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9580 
 9581   ins_cost(200); // XXX
 9582   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9583   ins_encode %{
 9584     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9585   %}
 9586   ins_pipe(pipe_cmov_reg); // XXX
 9587 %}
 9588 
 9589 // Conditional move ndd
 9590 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9591 %{
 9592   predicate(UseAPX);
 9593   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9594 
 9595   ins_cost(200);
 9596   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9597   ins_encode %{
 9598     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9599   %}
 9600   ins_pipe(pipe_cmov_reg);
 9601 %}
 9602 
 9603 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9604   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9605 
 9606   ins_cost(200);
 9607   expand %{
 9608     cmovP_regU(cop, cr, dst, src);
 9609   %}
 9610 %}
 9611 
 9612 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
 9613   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9614 
 9615   ins_cost(200);
 9616   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
 9617   ins_encode %{
 9618     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9619   %}
 9620   ins_pipe(pipe_cmov_reg);
 9621 %}
 9622 
 9623 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9624   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9625   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9626 
 9627   ins_cost(200); // XXX
 9628   format %{ "cmovpq  $dst, $src\n\t"
 9629             "cmovneq $dst, $src" %}
 9630   ins_encode %{
 9631     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9632     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9633   %}
 9634   ins_pipe(pipe_cmov_reg);
 9635 %}
 9636 
 9637 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9638 // inputs of the CMove
 9639 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9640   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9641   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9642 
 9643   ins_cost(200); // XXX
 9644   format %{ "cmovpq  $dst, $src\n\t"
 9645             "cmovneq $dst, $src" %}
 9646   ins_encode %{
 9647     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9648     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9649   %}
 9650   ins_pipe(pipe_cmov_reg);
 9651 %}
 9652 
 9653 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9654 %{
 9655   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9656   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9657 
 9658   ins_cost(100); // XXX
 9659   format %{ "setbn$cop $dst\t# signed, long" %}
 9660   ins_encode %{
 9661     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9662     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9663   %}
 9664   ins_pipe(ialu_reg);
 9665 %}
 9666 
 9667 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9668 %{
 9669   predicate(!UseAPX);
 9670   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9671 
 9672   ins_cost(200); // XXX
 9673   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9674   ins_encode %{
 9675     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9676   %}
 9677   ins_pipe(pipe_cmov_reg);  // XXX
 9678 %}
 9679 
 9680 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9681 %{
 9682   predicate(UseAPX);
 9683   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9684 
 9685   ins_cost(200);
 9686   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9687   ins_encode %{
 9688     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9689   %}
 9690   ins_pipe(pipe_cmov_reg);
 9691 %}
 9692 
 9693 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9694 %{
 9695   predicate(!UseAPX);
 9696   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9697 
 9698   ins_cost(200); // XXX
 9699   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9700   ins_encode %{
 9701     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9702   %}
 9703   ins_pipe(pipe_cmov_mem);  // XXX
 9704 %}
 9705 
 9706 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9707 %{
 9708   predicate(UseAPX);
 9709   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9710 
 9711   ins_cost(200);
 9712   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9713   ins_encode %{
 9714     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9715   %}
 9716   ins_pipe(pipe_cmov_mem);
 9717 %}
 9718 
 9719 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9720 %{
 9721   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9722   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9723 
 9724   ins_cost(100); // XXX
 9725   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9726   ins_encode %{
 9727     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9728     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9729   %}
 9730   ins_pipe(ialu_reg);
 9731 %}
 9732 
 9733 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9734 %{
 9735   predicate(!UseAPX);
 9736   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9737 
 9738   ins_cost(200); // XXX
 9739   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9740   ins_encode %{
 9741     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9742   %}
 9743   ins_pipe(pipe_cmov_reg); // XXX
 9744 %}
 9745 
 9746 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9747 %{
 9748   predicate(UseAPX);
 9749   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9750 
 9751   ins_cost(200);
 9752   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9753   ins_encode %{
 9754     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9755   %}
 9756   ins_pipe(pipe_cmov_reg);
 9757 %}
 9758 
 9759 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9760 %{
 9761   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9762   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9763 
 9764   ins_cost(100); // XXX
 9765   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9766   ins_encode %{
 9767     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9768     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9769   %}
 9770   ins_pipe(ialu_reg);
 9771 %}
 9772 
 9773 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9774 %{
 9775   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9776   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9777 
 9778   ins_cost(100); // XXX
 9779   format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
 9780   ins_encode %{
 9781     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9782     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9783   %}
 9784   ins_pipe(ialu_reg);
 9785 %}
 9786 
 9787 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9788   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9789 
 9790   ins_cost(200);
 9791   expand %{
 9792     cmovL_regU(cop, cr, dst, src);
 9793   %}
 9794 %}
 9795 
 9796 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
 9797 %{
 9798   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9799 
 9800   ins_cost(200);
 9801   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9802   ins_encode %{
 9803     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9804   %}
 9805   ins_pipe(pipe_cmov_reg);
 9806 %}
 9807 
 9808 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9809   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9810   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9811 
 9812   ins_cost(200); // XXX
 9813   format %{ "cmovpq  $dst, $src\n\t"
 9814             "cmovneq $dst, $src" %}
 9815   ins_encode %{
 9816     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9817     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9818   %}
 9819   ins_pipe(pipe_cmov_reg);
 9820 %}
 9821 
 9822 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9823 // inputs of the CMove
 9824 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9825   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9826   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9827 
 9828   ins_cost(200); // XXX
 9829   format %{ "cmovpq  $dst, $src\n\t"
 9830             "cmovneq $dst, $src" %}
 9831   ins_encode %{
 9832     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9833     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9834   %}
 9835   ins_pipe(pipe_cmov_reg);
 9836 %}
 9837 
 9838 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9839 %{
 9840   predicate(!UseAPX);
 9841   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9842 
 9843   ins_cost(200); // XXX
 9844   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9845   ins_encode %{
 9846     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9847   %}
 9848   ins_pipe(pipe_cmov_mem); // XXX
 9849 %}
 9850 
 9851 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9852   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9853 
 9854   ins_cost(200);
 9855   expand %{
 9856     cmovL_memU(cop, cr, dst, src);
 9857   %}
 9858 %}
 9859 
 9860 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9861 %{
 9862   predicate(UseAPX);
 9863   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9864 
 9865   ins_cost(200);
 9866   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9867   ins_encode %{
 9868     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9869   %}
 9870   ins_pipe(pipe_cmov_mem);
 9871 %}
 9872 
 9873 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
 9874 %{
 9875   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9876 
 9877   ins_cost(200);
 9878   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9879   ins_encode %{
 9880     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9881   %}
 9882   ins_pipe(pipe_cmov_mem);
 9883 %}
 9884 
 9885 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9886 %{
 9887   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9888 
 9889   ins_cost(200); // XXX
 9890   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9891             "movss     $dst, $src\n"
 9892     "skip:" %}
 9893   ins_encode %{
 9894     Label Lskip;
 9895     // Invert sense of branch from sense of CMOV
 9896     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9897     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9898     __ bind(Lskip);
 9899   %}
 9900   ins_pipe(pipe_slow);
 9901 %}
 9902 
 9903 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9904 %{
 9905   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9906 
 9907   ins_cost(200); // XXX
 9908   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9909             "movss     $dst, $src\n"
 9910     "skip:" %}
 9911   ins_encode %{
 9912     Label Lskip;
 9913     // Invert sense of branch from sense of CMOV
 9914     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9915     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9916     __ bind(Lskip);
 9917   %}
 9918   ins_pipe(pipe_slow);
 9919 %}
 9920 
 9921 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9922   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9923 
 9924   ins_cost(200);
 9925   expand %{
 9926     cmovF_regU(cop, cr, dst, src);
 9927   %}
 9928 %}
 9929 
 9930 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
 9931 %{
 9932   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9933 
 9934   ins_cost(200); // XXX
 9935   format %{ "jn$cop    skip\t# signed, unsigned cmove float\n\t"
 9936             "movss     $dst, $src\n"
 9937     "skip:" %}
 9938   ins_encode %{
 9939     Label Lskip;
 9940     // Invert sense of branch from sense of CMOV
 9941     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9942     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9943     __ bind(Lskip);
 9944   %}
 9945   ins_pipe(pipe_slow);
 9946 %}
 9947 
 9948 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9949 %{
 9950   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9951 
 9952   ins_cost(200); // XXX
 9953   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9954             "movsd     $dst, $src\n"
 9955     "skip:" %}
 9956   ins_encode %{
 9957     Label Lskip;
 9958     // Invert sense of branch from sense of CMOV
 9959     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9960     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9961     __ bind(Lskip);
 9962   %}
 9963   ins_pipe(pipe_slow);
 9964 %}
 9965 
 9966 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9967 %{
 9968   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9969 
 9970   ins_cost(200); // XXX
 9971   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9972             "movsd     $dst, $src\n"
 9973     "skip:" %}
 9974   ins_encode %{
 9975     Label Lskip;
 9976     // Invert sense of branch from sense of CMOV
 9977     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9978     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9979     __ bind(Lskip);
 9980   %}
 9981   ins_pipe(pipe_slow);
 9982 %}
 9983 
 9984 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9985   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9986 
 9987   ins_cost(200);
 9988   expand %{
 9989     cmovD_regU(cop, cr, dst, src);
 9990   %}
 9991 %}
 9992 
 9993 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
 9994 %{
 9995   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9996 
 9997   ins_cost(200); // XXX
 9998   format %{ "jn$cop    skip\t# signed, unsigned cmove double\n\t"
 9999             "movsd     $dst, $src\n"
10000     "skip:" %}
10001   ins_encode %{
10002     Label Lskip;
10003     // Invert sense of branch from sense of CMOV
10004     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
10005     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
10006     __ bind(Lskip);
10007   %}
10008   ins_pipe(pipe_slow);
10009 %}
10010 
10011 //----------Arithmetic Instructions--------------------------------------------
10012 //----------Addition Instructions----------------------------------------------
10013 
10014 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10015 %{
10016   predicate(!UseAPX);
10017   match(Set dst (AddI dst src));
10018   effect(KILL cr);
10019   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10020   format %{ "addl    $dst, $src\t# int" %}
10021   ins_encode %{
10022     __ addl($dst$$Register, $src$$Register);
10023   %}
10024   ins_pipe(ialu_reg_reg);
10025 %}
10026 
10027 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
10028 %{
10029   predicate(UseAPX);
10030   match(Set dst (AddI src1 src2));
10031   effect(KILL cr);
10032   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10033 
10034   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10035   ins_encode %{
10036     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
10037   %}
10038   ins_pipe(ialu_reg_reg);
10039 %}
10040 
10041 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10042 %{
10043   predicate(!UseAPX);
10044   match(Set dst (AddI dst src));
10045   effect(KILL cr);
10046   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10047 
10048   format %{ "addl    $dst, $src\t# int" %}
10049   ins_encode %{
10050     __ addl($dst$$Register, $src$$constant);
10051   %}
10052   ins_pipe( ialu_reg );
10053 %}
10054 
10055 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
10056 %{
10057   predicate(UseAPX);
10058   match(Set dst (AddI src1 src2));
10059   effect(KILL cr);
10060   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10061 
10062   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10063   ins_encode %{
10064     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
10065   %}
10066   ins_pipe( ialu_reg );
10067 %}
10068 
10069 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
10070 %{
10071   predicate(UseAPX);
10072   match(Set dst (AddI (LoadI src1) src2));
10073   effect(KILL cr);
10074   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10075 
10076   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10077   ins_encode %{
10078     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
10079   %}
10080   ins_pipe( ialu_reg );
10081 %}
10082 
10083 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10084 %{
10085   predicate(!UseAPX);
10086   match(Set dst (AddI dst (LoadI src)));
10087   effect(KILL cr);
10088   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10089 
10090   ins_cost(150); // XXX
10091   format %{ "addl    $dst, $src\t# int" %}
10092   ins_encode %{
10093     __ addl($dst$$Register, $src$$Address);
10094   %}
10095   ins_pipe(ialu_reg_mem);
10096 %}
10097 
10098 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10099 %{
10100   predicate(UseAPX);
10101   match(Set dst (AddI src1 (LoadI src2)));
10102   effect(KILL cr);
10103   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10104 
10105   ins_cost(150);
10106   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10107   ins_encode %{
10108     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10109   %}
10110   ins_pipe(ialu_reg_mem);
10111 %}
10112 
10113 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10114 %{
10115   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10116   effect(KILL cr);
10117   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10118 
10119   ins_cost(150); // XXX
10120   format %{ "addl    $dst, $src\t# int" %}
10121   ins_encode %{
10122     __ addl($dst$$Address, $src$$Register);
10123   %}
10124   ins_pipe(ialu_mem_reg);
10125 %}
10126 
10127 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10128 %{
10129   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10130   effect(KILL cr);
10131   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10132 
10133 
10134   ins_cost(125); // XXX
10135   format %{ "addl    $dst, $src\t# int" %}
10136   ins_encode %{
10137     __ addl($dst$$Address, $src$$constant);
10138   %}
10139   ins_pipe(ialu_mem_imm);
10140 %}
10141 
10142 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10143 %{
10144   predicate(!UseAPX && UseIncDec);
10145   match(Set dst (AddI dst src));
10146   effect(KILL cr);
10147 
10148   format %{ "incl    $dst\t# int" %}
10149   ins_encode %{
10150     __ incrementl($dst$$Register);
10151   %}
10152   ins_pipe(ialu_reg);
10153 %}
10154 
10155 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10156 %{
10157   predicate(UseAPX && UseIncDec);
10158   match(Set dst (AddI src val));
10159   effect(KILL cr);
10160   flag(PD::Flag_ndd_demotable_opr1);
10161 
10162   format %{ "eincl    $dst, $src\t# int ndd" %}
10163   ins_encode %{
10164     __ eincl($dst$$Register, $src$$Register, false);
10165   %}
10166   ins_pipe(ialu_reg);
10167 %}
10168 
10169 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10170 %{
10171   predicate(UseAPX && UseIncDec);
10172   match(Set dst (AddI (LoadI src) val));
10173   effect(KILL cr);
10174 
10175   format %{ "eincl    $dst, $src\t# int ndd" %}
10176   ins_encode %{
10177     __ eincl($dst$$Register, $src$$Address, false);
10178   %}
10179   ins_pipe(ialu_reg);
10180 %}
10181 
10182 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10183 %{
10184   predicate(UseIncDec);
10185   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10186   effect(KILL cr);
10187 
10188   ins_cost(125); // XXX
10189   format %{ "incl    $dst\t# int" %}
10190   ins_encode %{
10191     __ incrementl($dst$$Address);
10192   %}
10193   ins_pipe(ialu_mem_imm);
10194 %}
10195 
10196 // XXX why does that use AddI
10197 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10198 %{
10199   predicate(!UseAPX && UseIncDec);
10200   match(Set dst (AddI dst src));
10201   effect(KILL cr);
10202 
10203   format %{ "decl    $dst\t# int" %}
10204   ins_encode %{
10205     __ decrementl($dst$$Register);
10206   %}
10207   ins_pipe(ialu_reg);
10208 %}
10209 
10210 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10211 %{
10212   predicate(UseAPX && UseIncDec);
10213   match(Set dst (AddI src val));
10214   effect(KILL cr);
10215   flag(PD::Flag_ndd_demotable_opr1);
10216 
10217   format %{ "edecl    $dst, $src\t# int ndd" %}
10218   ins_encode %{
10219     __ edecl($dst$$Register, $src$$Register, false);
10220   %}
10221   ins_pipe(ialu_reg);
10222 %}
10223 
10224 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10225 %{
10226   predicate(UseAPX && UseIncDec);
10227   match(Set dst (AddI (LoadI src) val));
10228   effect(KILL cr);
10229 
10230   format %{ "edecl    $dst, $src\t# int ndd" %}
10231   ins_encode %{
10232     __ edecl($dst$$Register, $src$$Address, false);
10233   %}
10234   ins_pipe(ialu_reg);
10235 %}
10236 
10237 // XXX why does that use AddI
10238 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10239 %{
10240   predicate(UseIncDec);
10241   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10242   effect(KILL cr);
10243 
10244   ins_cost(125); // XXX
10245   format %{ "decl    $dst\t# int" %}
10246   ins_encode %{
10247     __ decrementl($dst$$Address);
10248   %}
10249   ins_pipe(ialu_mem_imm);
10250 %}
10251 
10252 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10253 %{
10254   predicate(VM_Version::supports_fast_2op_lea());
10255   match(Set dst (AddI (LShiftI index scale) disp));
10256 
10257   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10258   ins_encode %{
10259     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10260     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10261   %}
10262   ins_pipe(ialu_reg_reg);
10263 %}
10264 
10265 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10266 %{
10267   predicate(VM_Version::supports_fast_3op_lea());
10268   match(Set dst (AddI (AddI base index) disp));
10269 
10270   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10271   ins_encode %{
10272     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10273   %}
10274   ins_pipe(ialu_reg_reg);
10275 %}
10276 
10277 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10278 %{
10279   predicate(VM_Version::supports_fast_2op_lea());
10280   match(Set dst (AddI base (LShiftI index scale)));
10281 
10282   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10283   ins_encode %{
10284     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10285     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10286   %}
10287   ins_pipe(ialu_reg_reg);
10288 %}
10289 
10290 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10291 %{
10292   predicate(VM_Version::supports_fast_3op_lea());
10293   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10294 
10295   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10296   ins_encode %{
10297     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10298     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10299   %}
10300   ins_pipe(ialu_reg_reg);
10301 %}
10302 
10303 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10304 %{
10305   predicate(!UseAPX);
10306   match(Set dst (AddL dst src));
10307   effect(KILL cr);
10308   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10309 
10310   format %{ "addq    $dst, $src\t# long" %}
10311   ins_encode %{
10312     __ addq($dst$$Register, $src$$Register);
10313   %}
10314   ins_pipe(ialu_reg_reg);
10315 %}
10316 
10317 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10318 %{
10319   predicate(UseAPX);
10320   match(Set dst (AddL src1 src2));
10321   effect(KILL cr);
10322   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10323 
10324   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10325   ins_encode %{
10326     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10327   %}
10328   ins_pipe(ialu_reg_reg);
10329 %}
10330 
10331 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10332 %{
10333   predicate(!UseAPX);
10334   match(Set dst (AddL dst src));
10335   effect(KILL cr);
10336   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10337 
10338   format %{ "addq    $dst, $src\t# long" %}
10339   ins_encode %{
10340     __ addq($dst$$Register, $src$$constant);
10341   %}
10342   ins_pipe( ialu_reg );
10343 %}
10344 
10345 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10346 %{
10347   predicate(UseAPX);
10348   match(Set dst (AddL src1 src2));
10349   effect(KILL cr);
10350   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10351 
10352   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10353   ins_encode %{
10354     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10355   %}
10356   ins_pipe( ialu_reg );
10357 %}
10358 
10359 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10360 %{
10361   predicate(UseAPX);
10362   match(Set dst (AddL (LoadL src1) src2));
10363   effect(KILL cr);
10364   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10365 
10366   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10367   ins_encode %{
10368     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10369   %}
10370   ins_pipe( ialu_reg );
10371 %}
10372 
10373 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10374 %{
10375   predicate(!UseAPX);
10376   match(Set dst (AddL dst (LoadL src)));
10377   effect(KILL cr);
10378   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10379 
10380   ins_cost(150); // XXX
10381   format %{ "addq    $dst, $src\t# long" %}
10382   ins_encode %{
10383     __ addq($dst$$Register, $src$$Address);
10384   %}
10385   ins_pipe(ialu_reg_mem);
10386 %}
10387 
10388 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10389 %{
10390   predicate(UseAPX);
10391   match(Set dst (AddL src1 (LoadL src2)));
10392   effect(KILL cr);
10393   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10394 
10395   ins_cost(150);
10396   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10397   ins_encode %{
10398     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10399   %}
10400   ins_pipe(ialu_reg_mem);
10401 %}
10402 
10403 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10404 %{
10405   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10406   effect(KILL cr);
10407   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10408 
10409   ins_cost(150); // XXX
10410   format %{ "addq    $dst, $src\t# long" %}
10411   ins_encode %{
10412     __ addq($dst$$Address, $src$$Register);
10413   %}
10414   ins_pipe(ialu_mem_reg);
10415 %}
10416 
10417 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10418 %{
10419   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10420   effect(KILL cr);
10421   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10422 
10423   ins_cost(125); // XXX
10424   format %{ "addq    $dst, $src\t# long" %}
10425   ins_encode %{
10426     __ addq($dst$$Address, $src$$constant);
10427   %}
10428   ins_pipe(ialu_mem_imm);
10429 %}
10430 
10431 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10432 %{
10433   predicate(!UseAPX && UseIncDec);
10434   match(Set dst (AddL dst src));
10435   effect(KILL cr);
10436 
10437   format %{ "incq    $dst\t# long" %}
10438   ins_encode %{
10439     __ incrementq($dst$$Register);
10440   %}
10441   ins_pipe(ialu_reg);
10442 %}
10443 
10444 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10445 %{
10446   predicate(UseAPX && UseIncDec);
10447   match(Set dst (AddL src val));
10448   effect(KILL cr);
10449   flag(PD::Flag_ndd_demotable_opr1);
10450 
10451   format %{ "eincq    $dst, $src\t# long ndd" %}
10452   ins_encode %{
10453     __ eincq($dst$$Register, $src$$Register, false);
10454   %}
10455   ins_pipe(ialu_reg);
10456 %}
10457 
10458 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10459 %{
10460   predicate(UseAPX && UseIncDec);
10461   match(Set dst (AddL (LoadL src) val));
10462   effect(KILL cr);
10463 
10464   format %{ "eincq    $dst, $src\t# long ndd" %}
10465   ins_encode %{
10466     __ eincq($dst$$Register, $src$$Address, false);
10467   %}
10468   ins_pipe(ialu_reg);
10469 %}
10470 
10471 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10472 %{
10473   predicate(UseIncDec);
10474   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10475   effect(KILL cr);
10476 
10477   ins_cost(125); // XXX
10478   format %{ "incq    $dst\t# long" %}
10479   ins_encode %{
10480     __ incrementq($dst$$Address);
10481   %}
10482   ins_pipe(ialu_mem_imm);
10483 %}
10484 
10485 // XXX why does that use AddL
10486 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10487 %{
10488   predicate(!UseAPX && UseIncDec);
10489   match(Set dst (AddL dst src));
10490   effect(KILL cr);
10491 
10492   format %{ "decq    $dst\t# long" %}
10493   ins_encode %{
10494     __ decrementq($dst$$Register);
10495   %}
10496   ins_pipe(ialu_reg);
10497 %}
10498 
10499 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10500 %{
10501   predicate(UseAPX && UseIncDec);
10502   match(Set dst (AddL src val));
10503   effect(KILL cr);
10504   flag(PD::Flag_ndd_demotable_opr1);
10505 
10506   format %{ "edecq    $dst, $src\t# long ndd" %}
10507   ins_encode %{
10508     __ edecq($dst$$Register, $src$$Register, false);
10509   %}
10510   ins_pipe(ialu_reg);
10511 %}
10512 
10513 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10514 %{
10515   predicate(UseAPX && UseIncDec);
10516   match(Set dst (AddL (LoadL src) val));
10517   effect(KILL cr);
10518 
10519   format %{ "edecq    $dst, $src\t# long ndd" %}
10520   ins_encode %{
10521     __ edecq($dst$$Register, $src$$Address, false);
10522   %}
10523   ins_pipe(ialu_reg);
10524 %}
10525 
10526 // XXX why does that use AddL
10527 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10528 %{
10529   predicate(UseIncDec);
10530   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10531   effect(KILL cr);
10532 
10533   ins_cost(125); // XXX
10534   format %{ "decq    $dst\t# long" %}
10535   ins_encode %{
10536     __ decrementq($dst$$Address);
10537   %}
10538   ins_pipe(ialu_mem_imm);
10539 %}
10540 
10541 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10542 %{
10543   predicate(VM_Version::supports_fast_2op_lea());
10544   match(Set dst (AddL (LShiftL index scale) disp));
10545 
10546   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10547   ins_encode %{
10548     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10549     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10550   %}
10551   ins_pipe(ialu_reg_reg);
10552 %}
10553 
10554 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10555 %{
10556   predicate(VM_Version::supports_fast_3op_lea());
10557   match(Set dst (AddL (AddL base index) disp));
10558 
10559   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10560   ins_encode %{
10561     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10562   %}
10563   ins_pipe(ialu_reg_reg);
10564 %}
10565 
10566 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10567 %{
10568   predicate(VM_Version::supports_fast_2op_lea());
10569   match(Set dst (AddL base (LShiftL index scale)));
10570 
10571   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10572   ins_encode %{
10573     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10574     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10575   %}
10576   ins_pipe(ialu_reg_reg);
10577 %}
10578 
10579 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10580 %{
10581   predicate(VM_Version::supports_fast_3op_lea());
10582   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10583 
10584   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10585   ins_encode %{
10586     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10587     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10588   %}
10589   ins_pipe(ialu_reg_reg);
10590 %}
10591 
10592 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10593 %{
10594   match(Set dst (AddP dst src));
10595   effect(KILL cr);
10596   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10597 
10598   format %{ "addq    $dst, $src\t# ptr" %}
10599   ins_encode %{
10600     __ addq($dst$$Register, $src$$Register);
10601   %}
10602   ins_pipe(ialu_reg_reg);
10603 %}
10604 
10605 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10606 %{
10607   match(Set dst (AddP dst src));
10608   effect(KILL cr);
10609   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10610 
10611   format %{ "addq    $dst, $src\t# ptr" %}
10612   ins_encode %{
10613     __ addq($dst$$Register, $src$$constant);
10614   %}
10615   ins_pipe( ialu_reg );
10616 %}
10617 
10618 // XXX addP mem ops ????
10619 
10620 instruct checkCastPP(rRegP dst)
10621 %{
10622   match(Set dst (CheckCastPP dst));
10623 
10624   size(0);
10625   format %{ "# checkcastPP of $dst" %}
10626   ins_encode(/* empty encoding */);
10627   ins_pipe(empty);
10628 %}
10629 
10630 instruct castPP(rRegP dst)
10631 %{
10632   match(Set dst (CastPP dst));
10633 
10634   size(0);
10635   format %{ "# castPP of $dst" %}
10636   ins_encode(/* empty encoding */);
10637   ins_pipe(empty);
10638 %}
10639 
10640 instruct castII(rRegI dst)
10641 %{
10642   predicate(VerifyConstraintCasts == 0);
10643   match(Set dst (CastII dst));
10644 
10645   size(0);
10646   format %{ "# castII of $dst" %}
10647   ins_encode(/* empty encoding */);
10648   ins_cost(0);
10649   ins_pipe(empty);
10650 %}
10651 
10652 instruct castII_checked(rRegI dst, rFlagsReg cr)
10653 %{
10654   predicate(VerifyConstraintCasts > 0);
10655   match(Set dst (CastII dst));
10656 
10657   effect(KILL cr);
10658   format %{ "# cast_checked_II $dst" %}
10659   ins_encode %{
10660     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10661   %}
10662   ins_pipe(pipe_slow);
10663 %}
10664 
10665 instruct castLL(rRegL dst)
10666 %{
10667   predicate(VerifyConstraintCasts == 0);
10668   match(Set dst (CastLL dst));
10669 
10670   size(0);
10671   format %{ "# castLL of $dst" %}
10672   ins_encode(/* empty encoding */);
10673   ins_cost(0);
10674   ins_pipe(empty);
10675 %}
10676 
10677 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10678 %{
10679   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10680   match(Set dst (CastLL dst));
10681 
10682   effect(KILL cr);
10683   format %{ "# cast_checked_LL $dst" %}
10684   ins_encode %{
10685     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10686   %}
10687   ins_pipe(pipe_slow);
10688 %}
10689 
10690 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10691 %{
10692   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10693   match(Set dst (CastLL dst));
10694 
10695   effect(KILL cr, TEMP tmp);
10696   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10697   ins_encode %{
10698     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10699   %}
10700   ins_pipe(pipe_slow);
10701 %}
10702 
10703 instruct castFF(regF dst)
10704 %{
10705   match(Set dst (CastFF dst));
10706 
10707   size(0);
10708   format %{ "# castFF of $dst" %}
10709   ins_encode(/* empty encoding */);
10710   ins_cost(0);
10711   ins_pipe(empty);
10712 %}
10713 
10714 instruct castHH(regF dst)
10715 %{
10716   match(Set dst (CastHH dst));
10717 
10718   size(0);
10719   format %{ "# castHH of $dst" %}
10720   ins_encode(/* empty encoding */);
10721   ins_cost(0);
10722   ins_pipe(empty);
10723 %}
10724 
10725 instruct castDD(regD dst)
10726 %{
10727   match(Set dst (CastDD dst));
10728 
10729   size(0);
10730   format %{ "# castDD of $dst" %}
10731   ins_encode(/* empty encoding */);
10732   ins_cost(0);
10733   ins_pipe(empty);
10734 %}
10735 
10736 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10737 instruct compareAndSwapP(rRegI res,
10738                          memory mem_ptr,
10739                          rax_RegP oldval, rRegP newval,
10740                          rFlagsReg cr)
10741 %{
10742   predicate(n->as_LoadStore()->barrier_data() == 0);
10743   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10744   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10745   effect(KILL cr, KILL oldval);
10746 
10747   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10748             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10749             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10750   ins_encode %{
10751     __ lock();
10752     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10753     __ setcc(Assembler::equal, $res$$Register);
10754   %}
10755   ins_pipe( pipe_cmpxchg );
10756 %}
10757 
10758 instruct compareAndSwapL(rRegI res,
10759                          memory mem_ptr,
10760                          rax_RegL oldval, rRegL newval,
10761                          rFlagsReg cr)
10762 %{
10763   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10764   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10765   effect(KILL cr, KILL oldval);
10766 
10767   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10768             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10769             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10770   ins_encode %{
10771     __ lock();
10772     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10773     __ setcc(Assembler::equal, $res$$Register);
10774   %}
10775   ins_pipe( pipe_cmpxchg );
10776 %}
10777 
10778 instruct compareAndSwapI(rRegI res,
10779                          memory mem_ptr,
10780                          rax_RegI oldval, rRegI newval,
10781                          rFlagsReg cr)
10782 %{
10783   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10784   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10785   effect(KILL cr, KILL oldval);
10786 
10787   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10788             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10789             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10790   ins_encode %{
10791     __ lock();
10792     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10793     __ setcc(Assembler::equal, $res$$Register);
10794   %}
10795   ins_pipe( pipe_cmpxchg );
10796 %}
10797 
10798 instruct compareAndSwapB(rRegI res,
10799                          memory mem_ptr,
10800                          rax_RegI oldval, rRegI newval,
10801                          rFlagsReg cr)
10802 %{
10803   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10804   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10805   effect(KILL cr, KILL oldval);
10806 
10807   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10808             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10809             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10810   ins_encode %{
10811     __ lock();
10812     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10813     __ setcc(Assembler::equal, $res$$Register);
10814   %}
10815   ins_pipe( pipe_cmpxchg );
10816 %}
10817 
10818 instruct compareAndSwapS(rRegI res,
10819                          memory mem_ptr,
10820                          rax_RegI oldval, rRegI newval,
10821                          rFlagsReg cr)
10822 %{
10823   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10824   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10825   effect(KILL cr, KILL oldval);
10826 
10827   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10828             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10829             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10830   ins_encode %{
10831     __ lock();
10832     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10833     __ setcc(Assembler::equal, $res$$Register);
10834   %}
10835   ins_pipe( pipe_cmpxchg );
10836 %}
10837 
10838 instruct compareAndSwapN(rRegI res,
10839                           memory mem_ptr,
10840                           rax_RegN oldval, rRegN newval,
10841                           rFlagsReg cr) %{
10842   predicate(n->as_LoadStore()->barrier_data() == 0);
10843   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10844   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10845   effect(KILL cr, KILL oldval);
10846 
10847   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10848             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10849             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10850   ins_encode %{
10851     __ lock();
10852     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10853     __ setcc(Assembler::equal, $res$$Register);
10854   %}
10855   ins_pipe( pipe_cmpxchg );
10856 %}
10857 
10858 instruct compareAndExchangeB(
10859                          memory mem_ptr,
10860                          rax_RegI oldval, rRegI newval,
10861                          rFlagsReg cr)
10862 %{
10863   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10864   effect(KILL cr);
10865 
10866   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10867             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10868   ins_encode %{
10869     __ lock();
10870     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10871   %}
10872   ins_pipe( pipe_cmpxchg );
10873 %}
10874 
10875 instruct compareAndExchangeS(
10876                          memory mem_ptr,
10877                          rax_RegI oldval, rRegI newval,
10878                          rFlagsReg cr)
10879 %{
10880   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10881   effect(KILL cr);
10882 
10883   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10884             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10885   ins_encode %{
10886     __ lock();
10887     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10888   %}
10889   ins_pipe( pipe_cmpxchg );
10890 %}
10891 
10892 instruct compareAndExchangeI(
10893                          memory mem_ptr,
10894                          rax_RegI oldval, rRegI newval,
10895                          rFlagsReg cr)
10896 %{
10897   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10898   effect(KILL cr);
10899 
10900   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10901             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10902   ins_encode %{
10903     __ lock();
10904     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10905   %}
10906   ins_pipe( pipe_cmpxchg );
10907 %}
10908 
10909 instruct compareAndExchangeL(
10910                          memory mem_ptr,
10911                          rax_RegL oldval, rRegL newval,
10912                          rFlagsReg cr)
10913 %{
10914   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10915   effect(KILL cr);
10916 
10917   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10918             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10919   ins_encode %{
10920     __ lock();
10921     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10922   %}
10923   ins_pipe( pipe_cmpxchg );
10924 %}
10925 
10926 instruct compareAndExchangeN(
10927                           memory mem_ptr,
10928                           rax_RegN oldval, rRegN newval,
10929                           rFlagsReg cr) %{
10930   predicate(n->as_LoadStore()->barrier_data() == 0);
10931   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10932   effect(KILL cr);
10933 
10934   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10935             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10936   ins_encode %{
10937     __ lock();
10938     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10939   %}
10940   ins_pipe( pipe_cmpxchg );
10941 %}
10942 
10943 instruct compareAndExchangeP(
10944                          memory mem_ptr,
10945                          rax_RegP oldval, rRegP newval,
10946                          rFlagsReg cr)
10947 %{
10948   predicate(n->as_LoadStore()->barrier_data() == 0);
10949   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10950   effect(KILL cr);
10951 
10952   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10953             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10954   ins_encode %{
10955     __ lock();
10956     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10957   %}
10958   ins_pipe( pipe_cmpxchg );
10959 %}
10960 
10961 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10962   predicate(n->as_LoadStore()->result_not_used());
10963   match(Set dummy (GetAndAddB mem add));
10964   effect(KILL cr);
10965   format %{ "addb_lock   $mem, $add" %}
10966   ins_encode %{
10967     __ lock();
10968     __ addb($mem$$Address, $add$$Register);
10969   %}
10970   ins_pipe(pipe_cmpxchg);
10971 %}
10972 
10973 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10974   predicate(n->as_LoadStore()->result_not_used());
10975   match(Set dummy (GetAndAddB mem add));
10976   effect(KILL cr);
10977   format %{ "addb_lock   $mem, $add" %}
10978   ins_encode %{
10979     __ lock();
10980     __ addb($mem$$Address, $add$$constant);
10981   %}
10982   ins_pipe(pipe_cmpxchg);
10983 %}
10984 
10985 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10986   predicate(!n->as_LoadStore()->result_not_used());
10987   match(Set newval (GetAndAddB mem newval));
10988   effect(KILL cr);
10989   format %{ "xaddb_lock  $mem, $newval\t# $newval -> byte" %}
10990   ins_encode %{
10991     __ lock();
10992     __ xaddb($mem$$Address, $newval$$Register);
10993     __ narrow_subword_type($newval$$Register, T_BYTE);
10994   %}
10995   ins_pipe(pipe_cmpxchg);
10996 %}
10997 
10998 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10999   predicate(n->as_LoadStore()->result_not_used());
11000   match(Set dummy (GetAndAddS mem add));
11001   effect(KILL cr);
11002   format %{ "addw_lock   $mem, $add" %}
11003   ins_encode %{
11004     __ lock();
11005     __ addw($mem$$Address, $add$$Register);
11006   %}
11007   ins_pipe(pipe_cmpxchg);
11008 %}
11009 
11010 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
11011   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
11012   match(Set dummy (GetAndAddS mem add));
11013   effect(KILL cr);
11014   format %{ "addw_lock   $mem, $add" %}
11015   ins_encode %{
11016     __ lock();
11017     __ addw($mem$$Address, $add$$constant);
11018   %}
11019   ins_pipe(pipe_cmpxchg);
11020 %}
11021 
11022 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
11023   predicate(!n->as_LoadStore()->result_not_used());
11024   match(Set newval (GetAndAddS mem newval));
11025   effect(KILL cr);
11026   format %{ "xaddw_lock  $mem, $newval\t# $newval -> short" %}
11027   ins_encode %{
11028     __ lock();
11029     __ xaddw($mem$$Address, $newval$$Register);
11030     __ narrow_subword_type($newval$$Register, T_SHORT);
11031   %}
11032   ins_pipe(pipe_cmpxchg);
11033 %}
11034 
11035 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
11036   predicate(n->as_LoadStore()->result_not_used());
11037   match(Set dummy (GetAndAddI mem add));
11038   effect(KILL cr);
11039   format %{ "addl_lock   $mem, $add" %}
11040   ins_encode %{
11041     __ lock();
11042     __ addl($mem$$Address, $add$$Register);
11043   %}
11044   ins_pipe(pipe_cmpxchg);
11045 %}
11046 
11047 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
11048   predicate(n->as_LoadStore()->result_not_used());
11049   match(Set dummy (GetAndAddI mem add));
11050   effect(KILL cr);
11051   format %{ "addl_lock   $mem, $add" %}
11052   ins_encode %{
11053     __ lock();
11054     __ addl($mem$$Address, $add$$constant);
11055   %}
11056   ins_pipe(pipe_cmpxchg);
11057 %}
11058 
11059 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
11060   predicate(!n->as_LoadStore()->result_not_used());
11061   match(Set newval (GetAndAddI mem newval));
11062   effect(KILL cr);
11063   format %{ "xaddl_lock  $mem, $newval" %}
11064   ins_encode %{
11065     __ lock();
11066     __ xaddl($mem$$Address, $newval$$Register);
11067   %}
11068   ins_pipe(pipe_cmpxchg);
11069 %}
11070 
11071 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
11072   predicate(n->as_LoadStore()->result_not_used());
11073   match(Set dummy (GetAndAddL mem add));
11074   effect(KILL cr);
11075   format %{ "addq_lock   $mem, $add" %}
11076   ins_encode %{
11077     __ lock();
11078     __ addq($mem$$Address, $add$$Register);
11079   %}
11080   ins_pipe(pipe_cmpxchg);
11081 %}
11082 
11083 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
11084   predicate(n->as_LoadStore()->result_not_used());
11085   match(Set dummy (GetAndAddL mem add));
11086   effect(KILL cr);
11087   format %{ "addq_lock   $mem, $add" %}
11088   ins_encode %{
11089     __ lock();
11090     __ addq($mem$$Address, $add$$constant);
11091   %}
11092   ins_pipe(pipe_cmpxchg);
11093 %}
11094 
11095 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11096   predicate(!n->as_LoadStore()->result_not_used());
11097   match(Set newval (GetAndAddL mem newval));
11098   effect(KILL cr);
11099   format %{ "xaddq_lock  $mem, $newval" %}
11100   ins_encode %{
11101     __ lock();
11102     __ xaddq($mem$$Address, $newval$$Register);
11103   %}
11104   ins_pipe(pipe_cmpxchg);
11105 %}
11106 
11107 instruct xchgB( memory mem, rRegI newval) %{
11108   match(Set newval (GetAndSetB mem newval));
11109   format %{ "XCHGB  $newval,[$mem]\t# $newval -> byte" %}
11110   ins_encode %{
11111     __ xchgb($newval$$Register, $mem$$Address);
11112     __ narrow_subword_type($newval$$Register, T_BYTE);
11113   %}
11114   ins_pipe( pipe_cmpxchg );
11115 %}
11116 
11117 instruct xchgS( memory mem, rRegI newval) %{
11118   match(Set newval (GetAndSetS mem newval));
11119   format %{ "XCHGW  $newval,[$mem]\t# $newval -> short" %}
11120   ins_encode %{
11121     __ xchgw($newval$$Register, $mem$$Address);
11122     __ narrow_subword_type($newval$$Register, T_SHORT);
11123   %}
11124   ins_pipe( pipe_cmpxchg );
11125 %}
11126 
11127 instruct xchgI( memory mem, rRegI newval) %{
11128   match(Set newval (GetAndSetI mem newval));
11129   format %{ "XCHGL  $newval,[$mem]" %}
11130   ins_encode %{
11131     __ xchgl($newval$$Register, $mem$$Address);
11132   %}
11133   ins_pipe( pipe_cmpxchg );
11134 %}
11135 
11136 instruct xchgL( memory mem, rRegL newval) %{
11137   match(Set newval (GetAndSetL mem newval));
11138   format %{ "XCHGL  $newval,[$mem]" %}
11139   ins_encode %{
11140     __ xchgq($newval$$Register, $mem$$Address);
11141   %}
11142   ins_pipe( pipe_cmpxchg );
11143 %}
11144 
11145 instruct xchgP( memory mem, rRegP newval) %{
11146   match(Set newval (GetAndSetP mem newval));
11147   predicate(n->as_LoadStore()->barrier_data() == 0);
11148   format %{ "XCHGQ  $newval,[$mem]" %}
11149   ins_encode %{
11150     __ xchgq($newval$$Register, $mem$$Address);
11151   %}
11152   ins_pipe( pipe_cmpxchg );
11153 %}
11154 
11155 instruct xchgN( memory mem, rRegN newval) %{
11156   predicate(n->as_LoadStore()->barrier_data() == 0);
11157   match(Set newval (GetAndSetN mem newval));
11158   format %{ "XCHGL  $newval,$mem]" %}
11159   ins_encode %{
11160     __ xchgl($newval$$Register, $mem$$Address);
11161   %}
11162   ins_pipe( pipe_cmpxchg );
11163 %}
11164 
11165 //----------Abs Instructions-------------------------------------------
11166 
11167 // Integer Absolute Instructions
11168 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11169 %{
11170   match(Set dst (AbsI src));
11171   effect(TEMP dst, KILL cr);
11172   format %{ "xorl    $dst, $dst\t# abs int\n\t"
11173             "subl    $dst, $src\n\t"
11174             "cmovll  $dst, $src" %}
11175   ins_encode %{
11176     __ xorl($dst$$Register, $dst$$Register);
11177     __ subl($dst$$Register, $src$$Register);
11178     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11179   %}
11180 
11181   ins_pipe(ialu_reg_reg);
11182 %}
11183 
11184 // Long Absolute Instructions
11185 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11186 %{
11187   match(Set dst (AbsL src));
11188   effect(TEMP dst, KILL cr);
11189   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11190             "subq    $dst, $src\n\t"
11191             "cmovlq  $dst, $src" %}
11192   ins_encode %{
11193     __ xorl($dst$$Register, $dst$$Register);
11194     __ subq($dst$$Register, $src$$Register);
11195     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11196   %}
11197 
11198   ins_pipe(ialu_reg_reg);
11199 %}
11200 
11201 //----------Subtraction Instructions-------------------------------------------
11202 
11203 // Integer Subtraction Instructions
11204 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11205 %{
11206   predicate(!UseAPX);
11207   match(Set dst (SubI dst src));
11208   effect(KILL cr);
11209   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11210 
11211   format %{ "subl    $dst, $src\t# int" %}
11212   ins_encode %{
11213     __ subl($dst$$Register, $src$$Register);
11214   %}
11215   ins_pipe(ialu_reg_reg);
11216 %}
11217 
11218 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11219 %{
11220   predicate(UseAPX);
11221   match(Set dst (SubI src1 src2));
11222   effect(KILL cr);
11223   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11224 
11225   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11226   ins_encode %{
11227     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11228   %}
11229   ins_pipe(ialu_reg_reg);
11230 %}
11231 
11232 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11233 %{
11234   predicate(UseAPX);
11235   match(Set dst (SubI src1 src2));
11236   effect(KILL cr);
11237   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11238 
11239   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11240   ins_encode %{
11241     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11242   %}
11243   ins_pipe(ialu_reg_reg);
11244 %}
11245 
11246 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11247 %{
11248   predicate(UseAPX);
11249   match(Set dst (SubI (LoadI src1) src2));
11250   effect(KILL cr);
11251   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11252 
11253   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11254   ins_encode %{
11255     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11256   %}
11257   ins_pipe(ialu_reg_reg);
11258 %}
11259 
11260 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11261 %{
11262   predicate(!UseAPX);
11263   match(Set dst (SubI dst (LoadI src)));
11264   effect(KILL cr);
11265   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11266 
11267   ins_cost(150);
11268   format %{ "subl    $dst, $src\t# int" %}
11269   ins_encode %{
11270     __ subl($dst$$Register, $src$$Address);
11271   %}
11272   ins_pipe(ialu_reg_mem);
11273 %}
11274 
11275 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11276 %{
11277   predicate(UseAPX);
11278   match(Set dst (SubI src1 (LoadI src2)));
11279   effect(KILL cr);
11280   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11281 
11282   ins_cost(150);
11283   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11284   ins_encode %{
11285     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11286   %}
11287   ins_pipe(ialu_reg_mem);
11288 %}
11289 
11290 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11291 %{
11292   predicate(UseAPX);
11293   match(Set dst (SubI (LoadI src1) src2));
11294   effect(KILL cr);
11295   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11296 
11297   ins_cost(150);
11298   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11299   ins_encode %{
11300     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11301   %}
11302   ins_pipe(ialu_reg_mem);
11303 %}
11304 
11305 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11306 %{
11307   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11308   effect(KILL cr);
11309   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11310 
11311   ins_cost(150);
11312   format %{ "subl    $dst, $src\t# int" %}
11313   ins_encode %{
11314     __ subl($dst$$Address, $src$$Register);
11315   %}
11316   ins_pipe(ialu_mem_reg);
11317 %}
11318 
11319 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11320 %{
11321   predicate(!UseAPX);
11322   match(Set dst (SubL dst src));
11323   effect(KILL cr);
11324   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11325 
11326   format %{ "subq    $dst, $src\t# long" %}
11327   ins_encode %{
11328     __ subq($dst$$Register, $src$$Register);
11329   %}
11330   ins_pipe(ialu_reg_reg);
11331 %}
11332 
11333 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11334 %{
11335   predicate(UseAPX);
11336   match(Set dst (SubL src1 src2));
11337   effect(KILL cr);
11338   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11339 
11340   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11341   ins_encode %{
11342     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11343   %}
11344   ins_pipe(ialu_reg_reg);
11345 %}
11346 
11347 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11348 %{
11349   predicate(UseAPX);
11350   match(Set dst (SubL src1 src2));
11351   effect(KILL cr);
11352   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11353 
11354   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11355   ins_encode %{
11356     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11357   %}
11358   ins_pipe(ialu_reg_reg);
11359 %}
11360 
11361 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11362 %{
11363   predicate(UseAPX);
11364   match(Set dst (SubL (LoadL src1) src2));
11365   effect(KILL cr);
11366   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11367 
11368   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11369   ins_encode %{
11370     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11371   %}
11372   ins_pipe(ialu_reg_reg);
11373 %}
11374 
11375 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11376 %{
11377   predicate(!UseAPX);
11378   match(Set dst (SubL dst (LoadL src)));
11379   effect(KILL cr);
11380   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11381 
11382   ins_cost(150);
11383   format %{ "subq    $dst, $src\t# long" %}
11384   ins_encode %{
11385     __ subq($dst$$Register, $src$$Address);
11386   %}
11387   ins_pipe(ialu_reg_mem);
11388 %}
11389 
11390 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11391 %{
11392   predicate(UseAPX);
11393   match(Set dst (SubL src1 (LoadL src2)));
11394   effect(KILL cr);
11395   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11396 
11397   ins_cost(150);
11398   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11399   ins_encode %{
11400     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11401   %}
11402   ins_pipe(ialu_reg_mem);
11403 %}
11404 
11405 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11406 %{
11407   predicate(UseAPX);
11408   match(Set dst (SubL (LoadL src1) src2));
11409   effect(KILL cr);
11410   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11411 
11412   ins_cost(150);
11413   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11414   ins_encode %{
11415     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11416   %}
11417   ins_pipe(ialu_reg_mem);
11418 %}
11419 
11420 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11421 %{
11422   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11423   effect(KILL cr);
11424   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11425 
11426   ins_cost(150);
11427   format %{ "subq    $dst, $src\t# long" %}
11428   ins_encode %{
11429     __ subq($dst$$Address, $src$$Register);
11430   %}
11431   ins_pipe(ialu_mem_reg);
11432 %}
11433 
11434 // Subtract from a pointer
11435 // XXX hmpf???
11436 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11437 %{
11438   match(Set dst (AddP dst (SubI zero src)));
11439   effect(KILL cr);
11440 
11441   format %{ "subq    $dst, $src\t# ptr - int" %}
11442   ins_encode %{
11443     __ subq($dst$$Register, $src$$Register);
11444   %}
11445   ins_pipe(ialu_reg_reg);
11446 %}
11447 
11448 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11449 %{
11450   predicate(!UseAPX);
11451   match(Set dst (SubI zero dst));
11452   effect(KILL cr);
11453   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11454 
11455   format %{ "negl    $dst\t# int" %}
11456   ins_encode %{
11457     __ negl($dst$$Register);
11458   %}
11459   ins_pipe(ialu_reg);
11460 %}
11461 
11462 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11463 %{
11464   predicate(UseAPX);
11465   match(Set dst (SubI zero src));
11466   effect(KILL cr);
11467   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11468 
11469   format %{ "enegl    $dst, $src\t# int ndd" %}
11470   ins_encode %{
11471     __ enegl($dst$$Register, $src$$Register, false);
11472   %}
11473   ins_pipe(ialu_reg);
11474 %}
11475 
11476 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11477 %{
11478   predicate(!UseAPX);
11479   match(Set dst (NegI dst));
11480   effect(KILL cr);
11481   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11482 
11483   format %{ "negl    $dst\t# int" %}
11484   ins_encode %{
11485     __ negl($dst$$Register);
11486   %}
11487   ins_pipe(ialu_reg);
11488 %}
11489 
11490 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11491 %{
11492   predicate(UseAPX);
11493   match(Set dst (NegI src));
11494   effect(KILL cr);
11495   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11496 
11497   format %{ "enegl    $dst, $src\t# int ndd" %}
11498   ins_encode %{
11499     __ enegl($dst$$Register, $src$$Register, false);
11500   %}
11501   ins_pipe(ialu_reg);
11502 %}
11503 
11504 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11505 %{
11506   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11507   effect(KILL cr);
11508   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11509 
11510   format %{ "negl    $dst\t# int" %}
11511   ins_encode %{
11512     __ negl($dst$$Address);
11513   %}
11514   ins_pipe(ialu_reg);
11515 %}
11516 
11517 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11518 %{
11519   predicate(!UseAPX);
11520   match(Set dst (SubL zero dst));
11521   effect(KILL cr);
11522   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11523 
11524   format %{ "negq    $dst\t# long" %}
11525   ins_encode %{
11526     __ negq($dst$$Register);
11527   %}
11528   ins_pipe(ialu_reg);
11529 %}
11530 
11531 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11532 %{
11533   predicate(UseAPX);
11534   match(Set dst (SubL zero src));
11535   effect(KILL cr);
11536   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11537 
11538   format %{ "enegq    $dst, $src\t# long ndd" %}
11539   ins_encode %{
11540     __ enegq($dst$$Register, $src$$Register, false);
11541   %}
11542   ins_pipe(ialu_reg);
11543 %}
11544 
11545 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11546 %{
11547   predicate(!UseAPX);
11548   match(Set dst (NegL dst));
11549   effect(KILL cr);
11550   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11551 
11552   format %{ "negq    $dst\t# int" %}
11553   ins_encode %{
11554     __ negq($dst$$Register);
11555   %}
11556   ins_pipe(ialu_reg);
11557 %}
11558 
11559 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11560 %{
11561   predicate(UseAPX);
11562   match(Set dst (NegL src));
11563   effect(KILL cr);
11564   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11565 
11566   format %{ "enegq    $dst, $src\t# long ndd" %}
11567   ins_encode %{
11568     __ enegq($dst$$Register, $src$$Register, false);
11569   %}
11570   ins_pipe(ialu_reg);
11571 %}
11572 
11573 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11574 %{
11575   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11576   effect(KILL cr);
11577   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11578 
11579   format %{ "negq    $dst\t# long" %}
11580   ins_encode %{
11581     __ negq($dst$$Address);
11582   %}
11583   ins_pipe(ialu_reg);
11584 %}
11585 
11586 //----------Multiplication/Division Instructions-------------------------------
11587 // Integer Multiplication Instructions
11588 // Multiply Register
11589 
11590 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11591 %{
11592   predicate(!UseAPX);
11593   match(Set dst (MulI dst src));
11594   effect(KILL cr);
11595 
11596   ins_cost(300);
11597   format %{ "imull   $dst, $src\t# int" %}
11598   ins_encode %{
11599     __ imull($dst$$Register, $src$$Register);
11600   %}
11601   ins_pipe(ialu_reg_reg_alu0);
11602 %}
11603 
11604 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11605 %{
11606   predicate(UseAPX);
11607   match(Set dst (MulI src1 src2));
11608   effect(KILL cr);
11609   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11610 
11611   ins_cost(300);
11612   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11613   ins_encode %{
11614     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11615   %}
11616   ins_pipe(ialu_reg_reg_alu0);
11617 %}
11618 
11619 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11620 %{
11621   match(Set dst (MulI src imm));
11622   effect(KILL cr);
11623 
11624   ins_cost(300);
11625   format %{ "imull   $dst, $src, $imm\t# int" %}
11626   ins_encode %{
11627     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11628   %}
11629   ins_pipe(ialu_reg_reg_alu0);
11630 %}
11631 
11632 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11633 %{
11634   predicate(!UseAPX);
11635   match(Set dst (MulI dst (LoadI src)));
11636   effect(KILL cr);
11637 
11638   ins_cost(350);
11639   format %{ "imull   $dst, $src\t# int" %}
11640   ins_encode %{
11641     __ imull($dst$$Register, $src$$Address);
11642   %}
11643   ins_pipe(ialu_reg_mem_alu0);
11644 %}
11645 
11646 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11647 %{
11648   predicate(UseAPX);
11649   match(Set dst (MulI src1 (LoadI src2)));
11650   effect(KILL cr);
11651   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11652 
11653   ins_cost(350);
11654   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11655   ins_encode %{
11656     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11657   %}
11658   ins_pipe(ialu_reg_mem_alu0);
11659 %}
11660 
11661 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11662 %{
11663   match(Set dst (MulI (LoadI src) imm));
11664   effect(KILL cr);
11665 
11666   ins_cost(300);
11667   format %{ "imull   $dst, $src, $imm\t# int" %}
11668   ins_encode %{
11669     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11670   %}
11671   ins_pipe(ialu_reg_mem_alu0);
11672 %}
11673 
11674 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11675 %{
11676   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11677   effect(KILL cr, KILL src2);
11678 
11679   expand %{ mulI_rReg(dst, src1, cr);
11680            mulI_rReg(src2, src3, cr);
11681            addI_rReg(dst, src2, cr); %}
11682 %}
11683 
11684 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11685 %{
11686   predicate(!UseAPX);
11687   match(Set dst (MulL dst src));
11688   effect(KILL cr);
11689 
11690   ins_cost(300);
11691   format %{ "imulq   $dst, $src\t# long" %}
11692   ins_encode %{
11693     __ imulq($dst$$Register, $src$$Register);
11694   %}
11695   ins_pipe(ialu_reg_reg_alu0);
11696 %}
11697 
11698 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11699 %{
11700   predicate(UseAPX);
11701   match(Set dst (MulL src1 src2));
11702   effect(KILL cr);
11703   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11704 
11705   ins_cost(300);
11706   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11707   ins_encode %{
11708     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11709   %}
11710   ins_pipe(ialu_reg_reg_alu0);
11711 %}
11712 
11713 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11714 %{
11715   match(Set dst (MulL src imm));
11716   effect(KILL cr);
11717 
11718   ins_cost(300);
11719   format %{ "imulq   $dst, $src, $imm\t# long" %}
11720   ins_encode %{
11721     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11722   %}
11723   ins_pipe(ialu_reg_reg_alu0);
11724 %}
11725 
11726 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11727 %{
11728   predicate(!UseAPX);
11729   match(Set dst (MulL dst (LoadL src)));
11730   effect(KILL cr);
11731 
11732   ins_cost(350);
11733   format %{ "imulq   $dst, $src\t# long" %}
11734   ins_encode %{
11735     __ imulq($dst$$Register, $src$$Address);
11736   %}
11737   ins_pipe(ialu_reg_mem_alu0);
11738 %}
11739 
11740 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11741 %{
11742   predicate(UseAPX);
11743   match(Set dst (MulL src1 (LoadL src2)));
11744   effect(KILL cr);
11745   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11746 
11747   ins_cost(350);
11748   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11749   ins_encode %{
11750     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11751   %}
11752   ins_pipe(ialu_reg_mem_alu0);
11753 %}
11754 
11755 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11756 %{
11757   match(Set dst (MulL (LoadL src) imm));
11758   effect(KILL cr);
11759 
11760   ins_cost(300);
11761   format %{ "imulq   $dst, $src, $imm\t# long" %}
11762   ins_encode %{
11763     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11764   %}
11765   ins_pipe(ialu_reg_mem_alu0);
11766 %}
11767 
11768 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11769 %{
11770   match(Set dst (MulHiL src rax));
11771   effect(USE_KILL rax, KILL cr);
11772 
11773   ins_cost(300);
11774   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11775   ins_encode %{
11776     __ imulq($src$$Register);
11777   %}
11778   ins_pipe(ialu_reg_reg_alu0);
11779 %}
11780 
11781 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11782 %{
11783   match(Set dst (UMulHiL src rax));
11784   effect(USE_KILL rax, KILL cr);
11785 
11786   ins_cost(300);
11787   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11788   ins_encode %{
11789     __ mulq($src$$Register);
11790   %}
11791   ins_pipe(ialu_reg_reg_alu0);
11792 %}
11793 
11794 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11795                    rFlagsReg cr)
11796 %{
11797   match(Set rax (DivI rax div));
11798   effect(KILL rdx, KILL cr);
11799 
11800   ins_cost(30*100+10*100); // XXX
11801   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11802             "jne,s   normal\n\t"
11803             "xorl    rdx, rdx\n\t"
11804             "cmpl    $div, -1\n\t"
11805             "je,s    done\n"
11806     "normal: cdql\n\t"
11807             "idivl   $div\n"
11808     "done:"        %}
11809   ins_encode(cdql_enc(div));
11810   ins_pipe(ialu_reg_reg_alu0);
11811 %}
11812 
11813 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11814                    rFlagsReg cr)
11815 %{
11816   match(Set rax (DivL rax div));
11817   effect(KILL rdx, KILL cr);
11818 
11819   ins_cost(30*100+10*100); // XXX
11820   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11821             "cmpq    rax, rdx\n\t"
11822             "jne,s   normal\n\t"
11823             "xorl    rdx, rdx\n\t"
11824             "cmpq    $div, -1\n\t"
11825             "je,s    done\n"
11826     "normal: cdqq\n\t"
11827             "idivq   $div\n"
11828     "done:"        %}
11829   ins_encode(cdqq_enc(div));
11830   ins_pipe(ialu_reg_reg_alu0);
11831 %}
11832 
11833 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11834 %{
11835   match(Set rax (UDivI rax div));
11836   effect(KILL rdx, KILL cr);
11837 
11838   ins_cost(300);
11839   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11840   ins_encode %{
11841     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11842   %}
11843   ins_pipe(ialu_reg_reg_alu0);
11844 %}
11845 
11846 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11847 %{
11848   match(Set rax (UDivL rax div));
11849   effect(KILL rdx, KILL cr);
11850 
11851   ins_cost(300);
11852   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11853   ins_encode %{
11854      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11855   %}
11856   ins_pipe(ialu_reg_reg_alu0);
11857 %}
11858 
11859 // Integer DIVMOD with Register, both quotient and mod results
11860 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11861                              rFlagsReg cr)
11862 %{
11863   match(DivModI rax div);
11864   effect(KILL cr);
11865 
11866   ins_cost(30*100+10*100); // XXX
11867   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11868             "jne,s   normal\n\t"
11869             "xorl    rdx, rdx\n\t"
11870             "cmpl    $div, -1\n\t"
11871             "je,s    done\n"
11872     "normal: cdql\n\t"
11873             "idivl   $div\n"
11874     "done:"        %}
11875   ins_encode(cdql_enc(div));
11876   ins_pipe(pipe_slow);
11877 %}
11878 
11879 // Long DIVMOD with Register, both quotient and mod results
11880 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11881                              rFlagsReg cr)
11882 %{
11883   match(DivModL rax div);
11884   effect(KILL cr);
11885 
11886   ins_cost(30*100+10*100); // XXX
11887   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11888             "cmpq    rax, rdx\n\t"
11889             "jne,s   normal\n\t"
11890             "xorl    rdx, rdx\n\t"
11891             "cmpq    $div, -1\n\t"
11892             "je,s    done\n"
11893     "normal: cdqq\n\t"
11894             "idivq   $div\n"
11895     "done:"        %}
11896   ins_encode(cdqq_enc(div));
11897   ins_pipe(pipe_slow);
11898 %}
11899 
11900 // Unsigned integer DIVMOD with Register, both quotient and mod results
11901 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11902                               no_rax_rdx_RegI div, rFlagsReg cr)
11903 %{
11904   match(UDivModI rax div);
11905   effect(TEMP tmp, KILL cr);
11906 
11907   ins_cost(300);
11908   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11909             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11910           %}
11911   ins_encode %{
11912     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11913   %}
11914   ins_pipe(pipe_slow);
11915 %}
11916 
11917 // Unsigned long DIVMOD with Register, both quotient and mod results
11918 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11919                               no_rax_rdx_RegL div, rFlagsReg cr)
11920 %{
11921   match(UDivModL rax div);
11922   effect(TEMP tmp, KILL cr);
11923 
11924   ins_cost(300);
11925   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11926             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11927           %}
11928   ins_encode %{
11929     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11930   %}
11931   ins_pipe(pipe_slow);
11932 %}
11933 
11934 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11935                    rFlagsReg cr)
11936 %{
11937   match(Set rdx (ModI rax div));
11938   effect(KILL rax, KILL cr);
11939 
11940   ins_cost(300); // XXX
11941   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11942             "jne,s   normal\n\t"
11943             "xorl    rdx, rdx\n\t"
11944             "cmpl    $div, -1\n\t"
11945             "je,s    done\n"
11946     "normal: cdql\n\t"
11947             "idivl   $div\n"
11948     "done:"        %}
11949   ins_encode(cdql_enc(div));
11950   ins_pipe(ialu_reg_reg_alu0);
11951 %}
11952 
11953 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11954                    rFlagsReg cr)
11955 %{
11956   match(Set rdx (ModL rax div));
11957   effect(KILL rax, KILL cr);
11958 
11959   ins_cost(300); // XXX
11960   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11961             "cmpq    rax, rdx\n\t"
11962             "jne,s   normal\n\t"
11963             "xorl    rdx, rdx\n\t"
11964             "cmpq    $div, -1\n\t"
11965             "je,s    done\n"
11966     "normal: cdqq\n\t"
11967             "idivq   $div\n"
11968     "done:"        %}
11969   ins_encode(cdqq_enc(div));
11970   ins_pipe(ialu_reg_reg_alu0);
11971 %}
11972 
11973 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11974 %{
11975   match(Set rdx (UModI rax div));
11976   effect(KILL rax, KILL cr);
11977 
11978   ins_cost(300);
11979   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11980   ins_encode %{
11981     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11982   %}
11983   ins_pipe(ialu_reg_reg_alu0);
11984 %}
11985 
11986 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11987 %{
11988   match(Set rdx (UModL rax div));
11989   effect(KILL rax, KILL cr);
11990 
11991   ins_cost(300);
11992   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11993   ins_encode %{
11994     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11995   %}
11996   ins_pipe(ialu_reg_reg_alu0);
11997 %}
11998 
11999 // Integer Shift Instructions
12000 // Shift Left by one, two, three
12001 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
12002 %{
12003   predicate(!UseAPX);
12004   match(Set dst (LShiftI dst shift));
12005   effect(KILL cr);
12006 
12007   format %{ "sall    $dst, $shift" %}
12008   ins_encode %{
12009     __ sall($dst$$Register, $shift$$constant);
12010   %}
12011   ins_pipe(ialu_reg);
12012 %}
12013 
12014 // Shift Left by one, two, three
12015 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
12016 %{
12017   predicate(UseAPX);
12018   match(Set dst (LShiftI src shift));
12019   effect(KILL cr);
12020   flag(PD::Flag_ndd_demotable_opr1);
12021 
12022   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
12023   ins_encode %{
12024     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
12025   %}
12026   ins_pipe(ialu_reg);
12027 %}
12028 
12029 // Shift Left by 8-bit immediate
12030 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12031 %{
12032   predicate(!UseAPX);
12033   match(Set dst (LShiftI dst shift));
12034   effect(KILL cr);
12035 
12036   format %{ "sall    $dst, $shift" %}
12037   ins_encode %{
12038     __ sall($dst$$Register, $shift$$constant);
12039   %}
12040   ins_pipe(ialu_reg);
12041 %}
12042 
12043 // Shift Left by 8-bit immediate
12044 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12045 %{
12046   predicate(UseAPX);
12047   match(Set dst (LShiftI src shift));
12048   effect(KILL cr);
12049   flag(PD::Flag_ndd_demotable_opr1);
12050 
12051   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
12052   ins_encode %{
12053     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
12054   %}
12055   ins_pipe(ialu_reg);
12056 %}
12057 
12058 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12059 %{
12060   predicate(UseAPX);
12061   match(Set dst (LShiftI (LoadI src) shift));
12062   effect(KILL cr);
12063 
12064   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
12065   ins_encode %{
12066     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
12067   %}
12068   ins_pipe(ialu_reg);
12069 %}
12070 
12071 // Shift Left by 8-bit immediate
12072 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12073 %{
12074   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12075   effect(KILL cr);
12076 
12077   format %{ "sall    $dst, $shift" %}
12078   ins_encode %{
12079     __ sall($dst$$Address, $shift$$constant);
12080   %}
12081   ins_pipe(ialu_mem_imm);
12082 %}
12083 
12084 // Shift Left by variable
12085 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12086 %{
12087   predicate(!VM_Version::supports_bmi2());
12088   match(Set dst (LShiftI dst shift));
12089   effect(KILL cr);
12090 
12091   format %{ "sall    $dst, $shift" %}
12092   ins_encode %{
12093     __ sall($dst$$Register);
12094   %}
12095   ins_pipe(ialu_reg_reg);
12096 %}
12097 
12098 // Shift Left by variable
12099 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12100 %{
12101   predicate(!VM_Version::supports_bmi2());
12102   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12103   effect(KILL cr);
12104 
12105   format %{ "sall    $dst, $shift" %}
12106   ins_encode %{
12107     __ sall($dst$$Address);
12108   %}
12109   ins_pipe(ialu_mem_reg);
12110 %}
12111 
12112 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12113 %{
12114   predicate(VM_Version::supports_bmi2());
12115   match(Set dst (LShiftI src shift));
12116 
12117   format %{ "shlxl   $dst, $src, $shift" %}
12118   ins_encode %{
12119     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12120   %}
12121   ins_pipe(ialu_reg_reg);
12122 %}
12123 
12124 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12125 %{
12126   predicate(VM_Version::supports_bmi2());
12127   match(Set dst (LShiftI (LoadI src) shift));
12128   ins_cost(175);
12129   format %{ "shlxl   $dst, $src, $shift" %}
12130   ins_encode %{
12131     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12132   %}
12133   ins_pipe(ialu_reg_mem);
12134 %}
12135 
12136 // Arithmetic Shift Right by 8-bit immediate
12137 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12138 %{
12139   predicate(!UseAPX);
12140   match(Set dst (RShiftI dst shift));
12141   effect(KILL cr);
12142 
12143   format %{ "sarl    $dst, $shift" %}
12144   ins_encode %{
12145     __ sarl($dst$$Register, $shift$$constant);
12146   %}
12147   ins_pipe(ialu_mem_imm);
12148 %}
12149 
12150 // Arithmetic Shift Right by 8-bit immediate
12151 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12152 %{
12153   predicate(UseAPX);
12154   match(Set dst (RShiftI src shift));
12155   effect(KILL cr);
12156   flag(PD::Flag_ndd_demotable_opr1);
12157 
12158   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12159   ins_encode %{
12160     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12161   %}
12162   ins_pipe(ialu_mem_imm);
12163 %}
12164 
12165 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12166 %{
12167   predicate(UseAPX);
12168   match(Set dst (RShiftI (LoadI src) shift));
12169   effect(KILL cr);
12170 
12171   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12172   ins_encode %{
12173     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12174   %}
12175   ins_pipe(ialu_mem_imm);
12176 %}
12177 
12178 // Arithmetic Shift Right by 8-bit immediate
12179 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12180 %{
12181   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12182   effect(KILL cr);
12183 
12184   format %{ "sarl    $dst, $shift" %}
12185   ins_encode %{
12186     __ sarl($dst$$Address, $shift$$constant);
12187   %}
12188   ins_pipe(ialu_mem_imm);
12189 %}
12190 
12191 // Arithmetic Shift Right by variable
12192 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12193 %{
12194   predicate(!VM_Version::supports_bmi2());
12195   match(Set dst (RShiftI dst shift));
12196   effect(KILL cr);
12197 
12198   format %{ "sarl    $dst, $shift" %}
12199   ins_encode %{
12200     __ sarl($dst$$Register);
12201   %}
12202   ins_pipe(ialu_reg_reg);
12203 %}
12204 
12205 // Arithmetic Shift Right by variable
12206 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12207 %{
12208   predicate(!VM_Version::supports_bmi2());
12209   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12210   effect(KILL cr);
12211 
12212   format %{ "sarl    $dst, $shift" %}
12213   ins_encode %{
12214     __ sarl($dst$$Address);
12215   %}
12216   ins_pipe(ialu_mem_reg);
12217 %}
12218 
12219 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12220 %{
12221   predicate(VM_Version::supports_bmi2());
12222   match(Set dst (RShiftI src shift));
12223 
12224   format %{ "sarxl   $dst, $src, $shift" %}
12225   ins_encode %{
12226     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12227   %}
12228   ins_pipe(ialu_reg_reg);
12229 %}
12230 
12231 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12232 %{
12233   predicate(VM_Version::supports_bmi2());
12234   match(Set dst (RShiftI (LoadI src) shift));
12235   ins_cost(175);
12236   format %{ "sarxl   $dst, $src, $shift" %}
12237   ins_encode %{
12238     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12239   %}
12240   ins_pipe(ialu_reg_mem);
12241 %}
12242 
12243 // Logical Shift Right by 8-bit immediate
12244 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12245 %{
12246   predicate(!UseAPX);
12247   match(Set dst (URShiftI dst shift));
12248   effect(KILL cr);
12249 
12250   format %{ "shrl    $dst, $shift" %}
12251   ins_encode %{
12252     __ shrl($dst$$Register, $shift$$constant);
12253   %}
12254   ins_pipe(ialu_reg);
12255 %}
12256 
12257 // Logical Shift Right by 8-bit immediate
12258 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12259 %{
12260   predicate(UseAPX);
12261   match(Set dst (URShiftI src shift));
12262   effect(KILL cr);
12263   flag(PD::Flag_ndd_demotable_opr1);
12264 
12265   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12266   ins_encode %{
12267     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12268   %}
12269   ins_pipe(ialu_reg);
12270 %}
12271 
12272 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12273 %{
12274   predicate(UseAPX);
12275   match(Set dst (URShiftI (LoadI src) shift));
12276   effect(KILL cr);
12277 
12278   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12279   ins_encode %{
12280     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12281   %}
12282   ins_pipe(ialu_reg);
12283 %}
12284 
12285 // Logical Shift Right by 8-bit immediate
12286 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12287 %{
12288   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12289   effect(KILL cr);
12290 
12291   format %{ "shrl    $dst, $shift" %}
12292   ins_encode %{
12293     __ shrl($dst$$Address, $shift$$constant);
12294   %}
12295   ins_pipe(ialu_mem_imm);
12296 %}
12297 
12298 // Logical Shift Right by variable
12299 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12300 %{
12301   predicate(!VM_Version::supports_bmi2());
12302   match(Set dst (URShiftI dst shift));
12303   effect(KILL cr);
12304 
12305   format %{ "shrl    $dst, $shift" %}
12306   ins_encode %{
12307     __ shrl($dst$$Register);
12308   %}
12309   ins_pipe(ialu_reg_reg);
12310 %}
12311 
12312 // Logical Shift Right by variable
12313 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12314 %{
12315   predicate(!VM_Version::supports_bmi2());
12316   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12317   effect(KILL cr);
12318 
12319   format %{ "shrl    $dst, $shift" %}
12320   ins_encode %{
12321     __ shrl($dst$$Address);
12322   %}
12323   ins_pipe(ialu_mem_reg);
12324 %}
12325 
12326 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12327 %{
12328   predicate(VM_Version::supports_bmi2());
12329   match(Set dst (URShiftI src shift));
12330 
12331   format %{ "shrxl   $dst, $src, $shift" %}
12332   ins_encode %{
12333     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12334   %}
12335   ins_pipe(ialu_reg_reg);
12336 %}
12337 
12338 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12339 %{
12340   predicate(VM_Version::supports_bmi2());
12341   match(Set dst (URShiftI (LoadI src) shift));
12342   ins_cost(175);
12343   format %{ "shrxl   $dst, $src, $shift" %}
12344   ins_encode %{
12345     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12346   %}
12347   ins_pipe(ialu_reg_mem);
12348 %}
12349 
12350 // Long Shift Instructions
12351 // Shift Left by one, two, three
12352 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12353 %{
12354   predicate(!UseAPX);
12355   match(Set dst (LShiftL dst shift));
12356   effect(KILL cr);
12357 
12358   format %{ "salq    $dst, $shift" %}
12359   ins_encode %{
12360     __ salq($dst$$Register, $shift$$constant);
12361   %}
12362   ins_pipe(ialu_reg);
12363 %}
12364 
12365 // Shift Left by one, two, three
12366 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12367 %{
12368   predicate(UseAPX);
12369   match(Set dst (LShiftL src shift));
12370   effect(KILL cr);
12371   flag(PD::Flag_ndd_demotable_opr1);
12372 
12373   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12374   ins_encode %{
12375     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12376   %}
12377   ins_pipe(ialu_reg);
12378 %}
12379 
12380 // Shift Left by 8-bit immediate
12381 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12382 %{
12383   predicate(!UseAPX);
12384   match(Set dst (LShiftL dst shift));
12385   effect(KILL cr);
12386 
12387   format %{ "salq    $dst, $shift" %}
12388   ins_encode %{
12389     __ salq($dst$$Register, $shift$$constant);
12390   %}
12391   ins_pipe(ialu_reg);
12392 %}
12393 
12394 // Shift Left by 8-bit immediate
12395 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12396 %{
12397   predicate(UseAPX);
12398   match(Set dst (LShiftL src shift));
12399   effect(KILL cr);
12400   flag(PD::Flag_ndd_demotable_opr1);
12401 
12402   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12403   ins_encode %{
12404     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12405   %}
12406   ins_pipe(ialu_reg);
12407 %}
12408 
12409 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12410 %{
12411   predicate(UseAPX);
12412   match(Set dst (LShiftL (LoadL src) shift));
12413   effect(KILL cr);
12414 
12415   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12416   ins_encode %{
12417     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12418   %}
12419   ins_pipe(ialu_reg);
12420 %}
12421 
12422 // Shift Left by 8-bit immediate
12423 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12424 %{
12425   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12426   effect(KILL cr);
12427 
12428   format %{ "salq    $dst, $shift" %}
12429   ins_encode %{
12430     __ salq($dst$$Address, $shift$$constant);
12431   %}
12432   ins_pipe(ialu_mem_imm);
12433 %}
12434 
12435 // Shift Left by variable
12436 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12437 %{
12438   predicate(!VM_Version::supports_bmi2());
12439   match(Set dst (LShiftL dst shift));
12440   effect(KILL cr);
12441 
12442   format %{ "salq    $dst, $shift" %}
12443   ins_encode %{
12444     __ salq($dst$$Register);
12445   %}
12446   ins_pipe(ialu_reg_reg);
12447 %}
12448 
12449 // Shift Left by variable
12450 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12451 %{
12452   predicate(!VM_Version::supports_bmi2());
12453   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12454   effect(KILL cr);
12455 
12456   format %{ "salq    $dst, $shift" %}
12457   ins_encode %{
12458     __ salq($dst$$Address);
12459   %}
12460   ins_pipe(ialu_mem_reg);
12461 %}
12462 
12463 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12464 %{
12465   predicate(VM_Version::supports_bmi2());
12466   match(Set dst (LShiftL src shift));
12467 
12468   format %{ "shlxq   $dst, $src, $shift" %}
12469   ins_encode %{
12470     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12471   %}
12472   ins_pipe(ialu_reg_reg);
12473 %}
12474 
12475 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12476 %{
12477   predicate(VM_Version::supports_bmi2());
12478   match(Set dst (LShiftL (LoadL src) shift));
12479   ins_cost(175);
12480   format %{ "shlxq   $dst, $src, $shift" %}
12481   ins_encode %{
12482     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12483   %}
12484   ins_pipe(ialu_reg_mem);
12485 %}
12486 
12487 // Arithmetic Shift Right by 8-bit immediate
12488 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12489 %{
12490   predicate(!UseAPX);
12491   match(Set dst (RShiftL dst shift));
12492   effect(KILL cr);
12493 
12494   format %{ "sarq    $dst, $shift" %}
12495   ins_encode %{
12496     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12497   %}
12498   ins_pipe(ialu_mem_imm);
12499 %}
12500 
12501 // Arithmetic Shift Right by 8-bit immediate
12502 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12503 %{
12504   predicate(UseAPX);
12505   match(Set dst (RShiftL src shift));
12506   effect(KILL cr);
12507   flag(PD::Flag_ndd_demotable_opr1);
12508 
12509   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12510   ins_encode %{
12511     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12512   %}
12513   ins_pipe(ialu_mem_imm);
12514 %}
12515 
12516 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12517 %{
12518   predicate(UseAPX);
12519   match(Set dst (RShiftL (LoadL src) shift));
12520   effect(KILL cr);
12521 
12522   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12523   ins_encode %{
12524     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12525   %}
12526   ins_pipe(ialu_mem_imm);
12527 %}
12528 
12529 // Arithmetic Shift Right by 8-bit immediate
12530 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12531 %{
12532   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12533   effect(KILL cr);
12534 
12535   format %{ "sarq    $dst, $shift" %}
12536   ins_encode %{
12537     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12538   %}
12539   ins_pipe(ialu_mem_imm);
12540 %}
12541 
12542 // Arithmetic Shift Right by variable
12543 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12544 %{
12545   predicate(!VM_Version::supports_bmi2());
12546   match(Set dst (RShiftL dst shift));
12547   effect(KILL cr);
12548 
12549   format %{ "sarq    $dst, $shift" %}
12550   ins_encode %{
12551     __ sarq($dst$$Register);
12552   %}
12553   ins_pipe(ialu_reg_reg);
12554 %}
12555 
12556 // Arithmetic Shift Right by variable
12557 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12558 %{
12559   predicate(!VM_Version::supports_bmi2());
12560   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12561   effect(KILL cr);
12562 
12563   format %{ "sarq    $dst, $shift" %}
12564   ins_encode %{
12565     __ sarq($dst$$Address);
12566   %}
12567   ins_pipe(ialu_mem_reg);
12568 %}
12569 
12570 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12571 %{
12572   predicate(VM_Version::supports_bmi2());
12573   match(Set dst (RShiftL src shift));
12574 
12575   format %{ "sarxq   $dst, $src, $shift" %}
12576   ins_encode %{
12577     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12578   %}
12579   ins_pipe(ialu_reg_reg);
12580 %}
12581 
12582 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12583 %{
12584   predicate(VM_Version::supports_bmi2());
12585   match(Set dst (RShiftL (LoadL src) shift));
12586   ins_cost(175);
12587   format %{ "sarxq   $dst, $src, $shift" %}
12588   ins_encode %{
12589     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12590   %}
12591   ins_pipe(ialu_reg_mem);
12592 %}
12593 
12594 // Logical Shift Right by 8-bit immediate
12595 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12596 %{
12597   predicate(!UseAPX);
12598   match(Set dst (URShiftL dst shift));
12599   effect(KILL cr);
12600 
12601   format %{ "shrq    $dst, $shift" %}
12602   ins_encode %{
12603     __ shrq($dst$$Register, $shift$$constant);
12604   %}
12605   ins_pipe(ialu_reg);
12606 %}
12607 
12608 // Logical Shift Right by 8-bit immediate
12609 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12610 %{
12611   predicate(UseAPX);
12612   match(Set dst (URShiftL src shift));
12613   effect(KILL cr);
12614   flag(PD::Flag_ndd_demotable_opr1);
12615 
12616   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12617   ins_encode %{
12618     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12619   %}
12620   ins_pipe(ialu_reg);
12621 %}
12622 
12623 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12624 %{
12625   predicate(UseAPX);
12626   match(Set dst (URShiftL (LoadL src) shift));
12627   effect(KILL cr);
12628 
12629   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12630   ins_encode %{
12631     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12632   %}
12633   ins_pipe(ialu_reg);
12634 %}
12635 
12636 // Logical Shift Right by 8-bit immediate
12637 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12638 %{
12639   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12640   effect(KILL cr);
12641 
12642   format %{ "shrq    $dst, $shift" %}
12643   ins_encode %{
12644     __ shrq($dst$$Address, $shift$$constant);
12645   %}
12646   ins_pipe(ialu_mem_imm);
12647 %}
12648 
12649 // Logical Shift Right by variable
12650 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12651 %{
12652   predicate(!VM_Version::supports_bmi2());
12653   match(Set dst (URShiftL dst shift));
12654   effect(KILL cr);
12655 
12656   format %{ "shrq    $dst, $shift" %}
12657   ins_encode %{
12658     __ shrq($dst$$Register);
12659   %}
12660   ins_pipe(ialu_reg_reg);
12661 %}
12662 
12663 // Logical Shift Right by variable
12664 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12665 %{
12666   predicate(!VM_Version::supports_bmi2());
12667   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12668   effect(KILL cr);
12669 
12670   format %{ "shrq    $dst, $shift" %}
12671   ins_encode %{
12672     __ shrq($dst$$Address);
12673   %}
12674   ins_pipe(ialu_mem_reg);
12675 %}
12676 
12677 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12678 %{
12679   predicate(VM_Version::supports_bmi2());
12680   match(Set dst (URShiftL src shift));
12681 
12682   format %{ "shrxq   $dst, $src, $shift" %}
12683   ins_encode %{
12684     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12685   %}
12686   ins_pipe(ialu_reg_reg);
12687 %}
12688 
12689 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12690 %{
12691   predicate(VM_Version::supports_bmi2());
12692   match(Set dst (URShiftL (LoadL src) shift));
12693   ins_cost(175);
12694   format %{ "shrxq   $dst, $src, $shift" %}
12695   ins_encode %{
12696     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12697   %}
12698   ins_pipe(ialu_reg_mem);
12699 %}
12700 
12701 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12702 // This idiom is used by the compiler for the i2b bytecode.
12703 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12704 %{
12705   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12706 
12707   format %{ "movsbl  $dst, $src\t# i2b" %}
12708   ins_encode %{
12709     __ movsbl($dst$$Register, $src$$Register);
12710   %}
12711   ins_pipe(ialu_reg_reg);
12712 %}
12713 
12714 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12715 // This idiom is used by the compiler the i2s bytecode.
12716 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12717 %{
12718   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12719 
12720   format %{ "movswl  $dst, $src\t# i2s" %}
12721   ins_encode %{
12722     __ movswl($dst$$Register, $src$$Register);
12723   %}
12724   ins_pipe(ialu_reg_reg);
12725 %}
12726 
12727 // ROL/ROR instructions
12728 
12729 // Rotate left by constant.
12730 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12731 %{
12732   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12733   match(Set dst (RotateLeft dst shift));
12734   effect(KILL cr);
12735   format %{ "roll    $dst, $shift" %}
12736   ins_encode %{
12737     __ roll($dst$$Register, $shift$$constant);
12738   %}
12739   ins_pipe(ialu_reg);
12740 %}
12741 
12742 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12743 %{
12744   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12745   match(Set dst (RotateLeft src shift));
12746   format %{ "rolxl   $dst, $src, $shift" %}
12747   ins_encode %{
12748     int shift = 32 - ($shift$$constant & 31);
12749     __ rorxl($dst$$Register, $src$$Register, shift);
12750   %}
12751   ins_pipe(ialu_reg_reg);
12752 %}
12753 
12754 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12755 %{
12756   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12757   match(Set dst (RotateLeft (LoadI src) shift));
12758   ins_cost(175);
12759   format %{ "rolxl   $dst, $src, $shift" %}
12760   ins_encode %{
12761     int shift = 32 - ($shift$$constant & 31);
12762     __ rorxl($dst$$Register, $src$$Address, shift);
12763   %}
12764   ins_pipe(ialu_reg_mem);
12765 %}
12766 
12767 // Rotate Left by variable
12768 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12769 %{
12770   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12771   match(Set dst (RotateLeft dst shift));
12772   effect(KILL cr);
12773   format %{ "roll    $dst, $shift" %}
12774   ins_encode %{
12775     __ roll($dst$$Register);
12776   %}
12777   ins_pipe(ialu_reg_reg);
12778 %}
12779 
12780 // Rotate Left by variable
12781 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12782 %{
12783   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12784   match(Set dst (RotateLeft src shift));
12785   effect(KILL cr);
12786   flag(PD::Flag_ndd_demotable_opr1);
12787 
12788   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12789   ins_encode %{
12790     __ eroll($dst$$Register, $src$$Register, false);
12791   %}
12792   ins_pipe(ialu_reg_reg);
12793 %}
12794 
12795 // Rotate Right by constant.
12796 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12797 %{
12798   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12799   match(Set dst (RotateRight dst shift));
12800   effect(KILL cr);
12801   format %{ "rorl    $dst, $shift" %}
12802   ins_encode %{
12803     __ rorl($dst$$Register, $shift$$constant);
12804   %}
12805   ins_pipe(ialu_reg);
12806 %}
12807 
12808 // Rotate Right by constant.
12809 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12810 %{
12811   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12812   match(Set dst (RotateRight src shift));
12813   format %{ "rorxl   $dst, $src, $shift" %}
12814   ins_encode %{
12815     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12816   %}
12817   ins_pipe(ialu_reg_reg);
12818 %}
12819 
12820 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12821 %{
12822   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12823   match(Set dst (RotateRight (LoadI src) shift));
12824   ins_cost(175);
12825   format %{ "rorxl   $dst, $src, $shift" %}
12826   ins_encode %{
12827     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12828   %}
12829   ins_pipe(ialu_reg_mem);
12830 %}
12831 
12832 // Rotate Right by variable
12833 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12834 %{
12835   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12836   match(Set dst (RotateRight dst shift));
12837   effect(KILL cr);
12838   format %{ "rorl    $dst, $shift" %}
12839   ins_encode %{
12840     __ rorl($dst$$Register);
12841   %}
12842   ins_pipe(ialu_reg_reg);
12843 %}
12844 
12845 // Rotate Right by variable
12846 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12847 %{
12848   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12849   match(Set dst (RotateRight src shift));
12850   effect(KILL cr);
12851   flag(PD::Flag_ndd_demotable_opr1);
12852 
12853   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12854   ins_encode %{
12855     __ erorl($dst$$Register, $src$$Register, false);
12856   %}
12857   ins_pipe(ialu_reg_reg);
12858 %}
12859 
12860 // Rotate Left by constant.
12861 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12862 %{
12863   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12864   match(Set dst (RotateLeft dst shift));
12865   effect(KILL cr);
12866   format %{ "rolq    $dst, $shift" %}
12867   ins_encode %{
12868     __ rolq($dst$$Register, $shift$$constant);
12869   %}
12870   ins_pipe(ialu_reg);
12871 %}
12872 
12873 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12874 %{
12875   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12876   match(Set dst (RotateLeft src shift));
12877   format %{ "rolxq   $dst, $src, $shift" %}
12878   ins_encode %{
12879     int shift = 64 - ($shift$$constant & 63);
12880     __ rorxq($dst$$Register, $src$$Register, shift);
12881   %}
12882   ins_pipe(ialu_reg_reg);
12883 %}
12884 
12885 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12886 %{
12887   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12888   match(Set dst (RotateLeft (LoadL src) shift));
12889   ins_cost(175);
12890   format %{ "rolxq   $dst, $src, $shift" %}
12891   ins_encode %{
12892     int shift = 64 - ($shift$$constant & 63);
12893     __ rorxq($dst$$Register, $src$$Address, shift);
12894   %}
12895   ins_pipe(ialu_reg_mem);
12896 %}
12897 
12898 // Rotate Left by variable
12899 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12900 %{
12901   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12902   match(Set dst (RotateLeft dst shift));
12903   effect(KILL cr);
12904 
12905   format %{ "rolq    $dst, $shift" %}
12906   ins_encode %{
12907     __ rolq($dst$$Register);
12908   %}
12909   ins_pipe(ialu_reg_reg);
12910 %}
12911 
12912 // Rotate Left by variable
12913 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12914 %{
12915   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12916   match(Set dst (RotateLeft src shift));
12917   effect(KILL cr);
12918   flag(PD::Flag_ndd_demotable_opr1);
12919 
12920   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12921   ins_encode %{
12922     __ erolq($dst$$Register, $src$$Register, false);
12923   %}
12924   ins_pipe(ialu_reg_reg);
12925 %}
12926 
12927 // Rotate Right by constant.
12928 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12929 %{
12930   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12931   match(Set dst (RotateRight dst shift));
12932   effect(KILL cr);
12933   format %{ "rorq    $dst, $shift" %}
12934   ins_encode %{
12935     __ rorq($dst$$Register, $shift$$constant);
12936   %}
12937   ins_pipe(ialu_reg);
12938 %}
12939 
12940 // Rotate Right by constant
12941 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12942 %{
12943   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12944   match(Set dst (RotateRight src shift));
12945   format %{ "rorxq   $dst, $src, $shift" %}
12946   ins_encode %{
12947     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12948   %}
12949   ins_pipe(ialu_reg_reg);
12950 %}
12951 
12952 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12953 %{
12954   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12955   match(Set dst (RotateRight (LoadL src) shift));
12956   ins_cost(175);
12957   format %{ "rorxq   $dst, $src, $shift" %}
12958   ins_encode %{
12959     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12960   %}
12961   ins_pipe(ialu_reg_mem);
12962 %}
12963 
12964 // Rotate Right by variable
12965 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12966 %{
12967   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12968   match(Set dst (RotateRight dst shift));
12969   effect(KILL cr);
12970   format %{ "rorq    $dst, $shift" %}
12971   ins_encode %{
12972     __ rorq($dst$$Register);
12973   %}
12974   ins_pipe(ialu_reg_reg);
12975 %}
12976 
12977 // Rotate Right by variable
12978 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12979 %{
12980   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12981   match(Set dst (RotateRight src shift));
12982   effect(KILL cr);
12983   flag(PD::Flag_ndd_demotable_opr1);
12984 
12985   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12986   ins_encode %{
12987     __ erorq($dst$$Register, $src$$Register, false);
12988   %}
12989   ins_pipe(ialu_reg_reg);
12990 %}
12991 
12992 //----------------------------- CompressBits/ExpandBits ------------------------
12993 
12994 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12995   predicate(n->bottom_type()->isa_long());
12996   match(Set dst (CompressBits src mask));
12997   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12998   ins_encode %{
12999     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
13000   %}
13001   ins_pipe( pipe_slow );
13002 %}
13003 
13004 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
13005   predicate(n->bottom_type()->isa_long());
13006   match(Set dst (ExpandBits src mask));
13007   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
13008   ins_encode %{
13009     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
13010   %}
13011   ins_pipe( pipe_slow );
13012 %}
13013 
13014 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
13015   predicate(n->bottom_type()->isa_long());
13016   match(Set dst (CompressBits src (LoadL mask)));
13017   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
13018   ins_encode %{
13019     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
13020   %}
13021   ins_pipe( pipe_slow );
13022 %}
13023 
13024 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
13025   predicate(n->bottom_type()->isa_long());
13026   match(Set dst (ExpandBits src (LoadL mask)));
13027   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
13028   ins_encode %{
13029     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
13030   %}
13031   ins_pipe( pipe_slow );
13032 %}
13033 
13034 
13035 // Logical Instructions
13036 
13037 // Integer Logical Instructions
13038 
13039 // And Instructions
13040 // And Register with Register
13041 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13042 %{
13043   predicate(!UseAPX);
13044   match(Set dst (AndI dst src));
13045   effect(KILL cr);
13046   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13047 
13048   format %{ "andl    $dst, $src\t# int" %}
13049   ins_encode %{
13050     __ andl($dst$$Register, $src$$Register);
13051   %}
13052   ins_pipe(ialu_reg_reg);
13053 %}
13054 
13055 // And Register with Register using New Data Destination (NDD)
13056 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13057 %{
13058   predicate(UseAPX);
13059   match(Set dst (AndI src1 src2));
13060   effect(KILL cr);
13061   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13062 
13063   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
13064   ins_encode %{
13065     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
13066 
13067   %}
13068   ins_pipe(ialu_reg_reg);
13069 %}
13070 
13071 // And Register with Immediate 255
13072 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
13073 %{
13074   match(Set dst (AndI src mask));
13075 
13076   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
13077   ins_encode %{
13078     __ movzbl($dst$$Register, $src$$Register);
13079   %}
13080   ins_pipe(ialu_reg);
13081 %}
13082 
13083 // And Register with Immediate 255 and promote to long
13084 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
13085 %{
13086   match(Set dst (ConvI2L (AndI src mask)));
13087 
13088   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
13089   ins_encode %{
13090     __ movzbl($dst$$Register, $src$$Register);
13091   %}
13092   ins_pipe(ialu_reg);
13093 %}
13094 
13095 // And Register with Immediate 65535
13096 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13097 %{
13098   match(Set dst (AndI src mask));
13099 
13100   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
13101   ins_encode %{
13102     __ movzwl($dst$$Register, $src$$Register);
13103   %}
13104   ins_pipe(ialu_reg);
13105 %}
13106 
13107 // And Register with Immediate 65535 and promote to long
13108 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13109 %{
13110   match(Set dst (ConvI2L (AndI src mask)));
13111 
13112   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
13113   ins_encode %{
13114     __ movzwl($dst$$Register, $src$$Register);
13115   %}
13116   ins_pipe(ialu_reg);
13117 %}
13118 
13119 // Can skip int2long conversions after AND with small bitmask
13120 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13121 %{
13122   predicate(VM_Version::supports_bmi2());
13123   ins_cost(125);
13124   effect(TEMP tmp, KILL cr);
13125   match(Set dst (ConvI2L (AndI src mask)));
13126   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
13127   ins_encode %{
13128     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13129     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13130   %}
13131   ins_pipe(ialu_reg_reg);
13132 %}
13133 
13134 // And Register with Immediate
13135 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13136 %{
13137   predicate(!UseAPX);
13138   match(Set dst (AndI dst src));
13139   effect(KILL cr);
13140   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13141 
13142   format %{ "andl    $dst, $src\t# int" %}
13143   ins_encode %{
13144     __ andl($dst$$Register, $src$$constant);
13145   %}
13146   ins_pipe(ialu_reg);
13147 %}
13148 
13149 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13150 %{
13151   predicate(UseAPX);
13152   match(Set dst (AndI src1 src2));
13153   effect(KILL cr);
13154   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13155 
13156   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13157   ins_encode %{
13158     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13159   %}
13160   ins_pipe(ialu_reg);
13161 %}
13162 
13163 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13164 %{
13165   predicate(UseAPX);
13166   match(Set dst (AndI (LoadI src1) src2));
13167   effect(KILL cr);
13168   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13169 
13170   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13171   ins_encode %{
13172     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13173   %}
13174   ins_pipe(ialu_reg);
13175 %}
13176 
13177 // And Register with Memory
13178 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13179 %{
13180   predicate(!UseAPX);
13181   match(Set dst (AndI dst (LoadI src)));
13182   effect(KILL cr);
13183   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13184 
13185   ins_cost(150);
13186   format %{ "andl    $dst, $src\t# int" %}
13187   ins_encode %{
13188     __ andl($dst$$Register, $src$$Address);
13189   %}
13190   ins_pipe(ialu_reg_mem);
13191 %}
13192 
13193 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13194 %{
13195   predicate(UseAPX);
13196   match(Set dst (AndI src1 (LoadI src2)));
13197   effect(KILL cr);
13198   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13199 
13200   ins_cost(150);
13201   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13202   ins_encode %{
13203     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13204   %}
13205   ins_pipe(ialu_reg_mem);
13206 %}
13207 
13208 // And Memory with Register
13209 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13210 %{
13211   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13212   effect(KILL cr);
13213   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13214 
13215   ins_cost(150);
13216   format %{ "andb    $dst, $src\t# byte" %}
13217   ins_encode %{
13218     __ andb($dst$$Address, $src$$Register);
13219   %}
13220   ins_pipe(ialu_mem_reg);
13221 %}
13222 
13223 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13224 %{
13225   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13226   effect(KILL cr);
13227   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13228 
13229   ins_cost(150);
13230   format %{ "andl    $dst, $src\t# int" %}
13231   ins_encode %{
13232     __ andl($dst$$Address, $src$$Register);
13233   %}
13234   ins_pipe(ialu_mem_reg);
13235 %}
13236 
13237 // And Memory with Immediate
13238 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13239 %{
13240   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13241   effect(KILL cr);
13242   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13243 
13244   ins_cost(125);
13245   format %{ "andl    $dst, $src\t# int" %}
13246   ins_encode %{
13247     __ andl($dst$$Address, $src$$constant);
13248   %}
13249   ins_pipe(ialu_mem_imm);
13250 %}
13251 
13252 // BMI1 instructions
13253 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13254   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13255   predicate(UseBMI1Instructions);
13256   effect(KILL cr);
13257   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13258 
13259   ins_cost(125);
13260   format %{ "andnl  $dst, $src1, $src2" %}
13261 
13262   ins_encode %{
13263     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13264   %}
13265   ins_pipe(ialu_reg_mem);
13266 %}
13267 
13268 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13269   match(Set dst (AndI (XorI src1 minus_1) src2));
13270   predicate(UseBMI1Instructions);
13271   effect(KILL cr);
13272   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13273 
13274   format %{ "andnl  $dst, $src1, $src2" %}
13275 
13276   ins_encode %{
13277     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13278   %}
13279   ins_pipe(ialu_reg);
13280 %}
13281 
13282 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13283   match(Set dst (AndI (SubI imm_zero src) src));
13284   predicate(UseBMI1Instructions);
13285   effect(KILL cr);
13286   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13287 
13288   format %{ "blsil  $dst, $src" %}
13289 
13290   ins_encode %{
13291     __ blsil($dst$$Register, $src$$Register);
13292   %}
13293   ins_pipe(ialu_reg);
13294 %}
13295 
13296 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13297   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13298   predicate(UseBMI1Instructions);
13299   effect(KILL cr);
13300   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13301 
13302   ins_cost(125);
13303   format %{ "blsil  $dst, $src" %}
13304 
13305   ins_encode %{
13306     __ blsil($dst$$Register, $src$$Address);
13307   %}
13308   ins_pipe(ialu_reg_mem);
13309 %}
13310 
13311 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13312 %{
13313   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13314   predicate(UseBMI1Instructions);
13315   effect(KILL cr);
13316   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13317 
13318   ins_cost(125);
13319   format %{ "blsmskl $dst, $src" %}
13320 
13321   ins_encode %{
13322     __ blsmskl($dst$$Register, $src$$Address);
13323   %}
13324   ins_pipe(ialu_reg_mem);
13325 %}
13326 
13327 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13328 %{
13329   match(Set dst (XorI (AddI src minus_1) src));
13330   predicate(UseBMI1Instructions);
13331   effect(KILL cr);
13332   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13333 
13334   format %{ "blsmskl $dst, $src" %}
13335 
13336   ins_encode %{
13337     __ blsmskl($dst$$Register, $src$$Register);
13338   %}
13339 
13340   ins_pipe(ialu_reg);
13341 %}
13342 
13343 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13344 %{
13345   match(Set dst (AndI (AddI src minus_1) src) );
13346   predicate(UseBMI1Instructions);
13347   effect(KILL cr);
13348   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13349 
13350   format %{ "blsrl  $dst, $src" %}
13351 
13352   ins_encode %{
13353     __ blsrl($dst$$Register, $src$$Register);
13354   %}
13355 
13356   ins_pipe(ialu_reg_mem);
13357 %}
13358 
13359 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13360 %{
13361   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13362   predicate(UseBMI1Instructions);
13363   effect(KILL cr);
13364   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13365 
13366   ins_cost(125);
13367   format %{ "blsrl  $dst, $src" %}
13368 
13369   ins_encode %{
13370     __ blsrl($dst$$Register, $src$$Address);
13371   %}
13372 
13373   ins_pipe(ialu_reg);
13374 %}
13375 
13376 // Or Instructions
13377 // Or Register with Register
13378 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13379 %{
13380   predicate(!UseAPX);
13381   match(Set dst (OrI dst src));
13382   effect(KILL cr);
13383   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13384 
13385   format %{ "orl     $dst, $src\t# int" %}
13386   ins_encode %{
13387     __ orl($dst$$Register, $src$$Register);
13388   %}
13389   ins_pipe(ialu_reg_reg);
13390 %}
13391 
13392 // Or Register with Register using New Data Destination (NDD)
13393 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13394 %{
13395   predicate(UseAPX);
13396   match(Set dst (OrI src1 src2));
13397   effect(KILL cr);
13398   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13399 
13400   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13401   ins_encode %{
13402     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13403   %}
13404   ins_pipe(ialu_reg_reg);
13405 %}
13406 
13407 // Or Register with Immediate
13408 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13409 %{
13410   predicate(!UseAPX);
13411   match(Set dst (OrI dst src));
13412   effect(KILL cr);
13413   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13414 
13415   format %{ "orl     $dst, $src\t# int" %}
13416   ins_encode %{
13417     __ orl($dst$$Register, $src$$constant);
13418   %}
13419   ins_pipe(ialu_reg);
13420 %}
13421 
13422 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13423 %{
13424   predicate(UseAPX);
13425   match(Set dst (OrI src1 src2));
13426   effect(KILL cr);
13427   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13428 
13429   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13430   ins_encode %{
13431     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13432   %}
13433   ins_pipe(ialu_reg);
13434 %}
13435 
13436 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13437 %{
13438   predicate(UseAPX);
13439   match(Set dst (OrI src1 src2));
13440   effect(KILL cr);
13441   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13442 
13443   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13444   ins_encode %{
13445     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13446   %}
13447   ins_pipe(ialu_reg);
13448 %}
13449 
13450 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13451 %{
13452   predicate(UseAPX);
13453   match(Set dst (OrI (LoadI src1) src2));
13454   effect(KILL cr);
13455   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13456 
13457   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13458   ins_encode %{
13459     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13460   %}
13461   ins_pipe(ialu_reg);
13462 %}
13463 
13464 // Or Register with Memory
13465 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13466 %{
13467   predicate(!UseAPX);
13468   match(Set dst (OrI dst (LoadI src)));
13469   effect(KILL cr);
13470   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13471 
13472   ins_cost(150);
13473   format %{ "orl     $dst, $src\t# int" %}
13474   ins_encode %{
13475     __ orl($dst$$Register, $src$$Address);
13476   %}
13477   ins_pipe(ialu_reg_mem);
13478 %}
13479 
13480 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13481 %{
13482   predicate(UseAPX);
13483   match(Set dst (OrI src1 (LoadI src2)));
13484   effect(KILL cr);
13485   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13486 
13487   ins_cost(150);
13488   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13489   ins_encode %{
13490     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13491   %}
13492   ins_pipe(ialu_reg_mem);
13493 %}
13494 
13495 // Or Memory with Register
13496 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13497 %{
13498   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13499   effect(KILL cr);
13500   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13501 
13502   ins_cost(150);
13503   format %{ "orb    $dst, $src\t# byte" %}
13504   ins_encode %{
13505     __ orb($dst$$Address, $src$$Register);
13506   %}
13507   ins_pipe(ialu_mem_reg);
13508 %}
13509 
13510 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13511 %{
13512   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13513   effect(KILL cr);
13514   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13515 
13516   ins_cost(150);
13517   format %{ "orl     $dst, $src\t# int" %}
13518   ins_encode %{
13519     __ orl($dst$$Address, $src$$Register);
13520   %}
13521   ins_pipe(ialu_mem_reg);
13522 %}
13523 
13524 // Or Memory with Immediate
13525 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13526 %{
13527   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13528   effect(KILL cr);
13529   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13530 
13531   ins_cost(125);
13532   format %{ "orl     $dst, $src\t# int" %}
13533   ins_encode %{
13534     __ orl($dst$$Address, $src$$constant);
13535   %}
13536   ins_pipe(ialu_mem_imm);
13537 %}
13538 
13539 // Xor Instructions
13540 // Xor Register with Register
13541 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13542 %{
13543   predicate(!UseAPX);
13544   match(Set dst (XorI dst src));
13545   effect(KILL cr);
13546   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13547 
13548   format %{ "xorl    $dst, $src\t# int" %}
13549   ins_encode %{
13550     __ xorl($dst$$Register, $src$$Register);
13551   %}
13552   ins_pipe(ialu_reg_reg);
13553 %}
13554 
13555 // Xor Register with Register using New Data Destination (NDD)
13556 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13557 %{
13558   predicate(UseAPX);
13559   match(Set dst (XorI src1 src2));
13560   effect(KILL cr);
13561   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13562 
13563   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13564   ins_encode %{
13565     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13566   %}
13567   ins_pipe(ialu_reg_reg);
13568 %}
13569 
13570 // Xor Register with Immediate -1
13571 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13572 %{
13573   predicate(!UseAPX);
13574   match(Set dst (XorI dst imm));
13575 
13576   format %{ "notl    $dst" %}
13577   ins_encode %{
13578      __ notl($dst$$Register);
13579   %}
13580   ins_pipe(ialu_reg);
13581 %}
13582 
13583 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13584 %{
13585   match(Set dst (XorI src imm));
13586   predicate(UseAPX);
13587   flag(PD::Flag_ndd_demotable_opr1);
13588 
13589   format %{ "enotl    $dst, $src" %}
13590   ins_encode %{
13591      __ enotl($dst$$Register, $src$$Register);
13592   %}
13593   ins_pipe(ialu_reg);
13594 %}
13595 
13596 // Xor Register with Immediate
13597 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13598 %{
13599   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13600   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13601   match(Set dst (XorI dst src));
13602   effect(KILL cr);
13603   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13604 
13605   format %{ "xorl    $dst, $src\t# int" %}
13606   ins_encode %{
13607     __ xorl($dst$$Register, $src$$constant);
13608   %}
13609   ins_pipe(ialu_reg);
13610 %}
13611 
13612 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13613 %{
13614   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13615   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13616   match(Set dst (XorI src1 src2));
13617   effect(KILL cr);
13618   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13619 
13620   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13621   ins_encode %{
13622     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13623   %}
13624   ins_pipe(ialu_reg);
13625 %}
13626 
13627 // Xor Memory with Immediate
13628 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13629 %{
13630   predicate(UseAPX);
13631   match(Set dst (XorI (LoadI src1) src2));
13632   effect(KILL cr);
13633   ins_cost(150);
13634   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13635 
13636   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13637   ins_encode %{
13638     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13639   %}
13640   ins_pipe(ialu_reg);
13641 %}
13642 
13643 // Xor Register with Memory
13644 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13645 %{
13646   predicate(!UseAPX);
13647   match(Set dst (XorI dst (LoadI src)));
13648   effect(KILL cr);
13649   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13650 
13651   ins_cost(150);
13652   format %{ "xorl    $dst, $src\t# int" %}
13653   ins_encode %{
13654     __ xorl($dst$$Register, $src$$Address);
13655   %}
13656   ins_pipe(ialu_reg_mem);
13657 %}
13658 
13659 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13660 %{
13661   predicate(UseAPX);
13662   match(Set dst (XorI src1 (LoadI src2)));
13663   effect(KILL cr);
13664   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13665 
13666   ins_cost(150);
13667   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13668   ins_encode %{
13669     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13670   %}
13671   ins_pipe(ialu_reg_mem);
13672 %}
13673 
13674 // Xor Memory with Register
13675 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13676 %{
13677   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13678   effect(KILL cr);
13679   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13680 
13681   ins_cost(150);
13682   format %{ "xorb    $dst, $src\t# byte" %}
13683   ins_encode %{
13684     __ xorb($dst$$Address, $src$$Register);
13685   %}
13686   ins_pipe(ialu_mem_reg);
13687 %}
13688 
13689 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13690 %{
13691   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13692   effect(KILL cr);
13693   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13694 
13695   ins_cost(150);
13696   format %{ "xorl    $dst, $src\t# int" %}
13697   ins_encode %{
13698     __ xorl($dst$$Address, $src$$Register);
13699   %}
13700   ins_pipe(ialu_mem_reg);
13701 %}
13702 
13703 // Xor Memory with Immediate
13704 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13705 %{
13706   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13707   effect(KILL cr);
13708   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13709 
13710   ins_cost(125);
13711   format %{ "xorl    $dst, $src\t# int" %}
13712   ins_encode %{
13713     __ xorl($dst$$Address, $src$$constant);
13714   %}
13715   ins_pipe(ialu_mem_imm);
13716 %}
13717 
13718 
13719 // Long Logical Instructions
13720 
13721 // And Instructions
13722 // And Register with Register
13723 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13724 %{
13725   predicate(!UseAPX);
13726   match(Set dst (AndL dst src));
13727   effect(KILL cr);
13728   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13729 
13730   format %{ "andq    $dst, $src\t# long" %}
13731   ins_encode %{
13732     __ andq($dst$$Register, $src$$Register);
13733   %}
13734   ins_pipe(ialu_reg_reg);
13735 %}
13736 
13737 // And Register with Register using New Data Destination (NDD)
13738 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13739 %{
13740   predicate(UseAPX);
13741   match(Set dst (AndL src1 src2));
13742   effect(KILL cr);
13743   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13744 
13745   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13746   ins_encode %{
13747     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13748 
13749   %}
13750   ins_pipe(ialu_reg_reg);
13751 %}
13752 
13753 // And Register with Immediate 255
13754 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13755 %{
13756   match(Set dst (AndL src mask));
13757 
13758   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13759   ins_encode %{
13760     // movzbl zeroes out the upper 32-bit and does not need REX.W
13761     __ movzbl($dst$$Register, $src$$Register);
13762   %}
13763   ins_pipe(ialu_reg);
13764 %}
13765 
13766 // And Register with Immediate 65535
13767 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13768 %{
13769   match(Set dst (AndL src mask));
13770 
13771   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13772   ins_encode %{
13773     // movzwl zeroes out the upper 32-bit and does not need REX.W
13774     __ movzwl($dst$$Register, $src$$Register);
13775   %}
13776   ins_pipe(ialu_reg);
13777 %}
13778 
13779 // And Register with Immediate
13780 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13781 %{
13782   predicate(!UseAPX);
13783   match(Set dst (AndL dst src));
13784   effect(KILL cr);
13785   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13786 
13787   format %{ "andq    $dst, $src\t# long" %}
13788   ins_encode %{
13789     __ andq($dst$$Register, $src$$constant);
13790   %}
13791   ins_pipe(ialu_reg);
13792 %}
13793 
13794 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13795 %{
13796   predicate(UseAPX);
13797   match(Set dst (AndL src1 src2));
13798   effect(KILL cr);
13799   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13800 
13801   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13802   ins_encode %{
13803     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13804   %}
13805   ins_pipe(ialu_reg);
13806 %}
13807 
13808 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13809 %{
13810   predicate(UseAPX);
13811   match(Set dst (AndL (LoadL src1) src2));
13812   effect(KILL cr);
13813   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13814 
13815   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13816   ins_encode %{
13817     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13818   %}
13819   ins_pipe(ialu_reg);
13820 %}
13821 
13822 // And Register with Memory
13823 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13824 %{
13825   predicate(!UseAPX);
13826   match(Set dst (AndL dst (LoadL src)));
13827   effect(KILL cr);
13828   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13829 
13830   ins_cost(150);
13831   format %{ "andq    $dst, $src\t# long" %}
13832   ins_encode %{
13833     __ andq($dst$$Register, $src$$Address);
13834   %}
13835   ins_pipe(ialu_reg_mem);
13836 %}
13837 
13838 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13839 %{
13840   predicate(UseAPX);
13841   match(Set dst (AndL src1 (LoadL src2)));
13842   effect(KILL cr);
13843   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13844 
13845   ins_cost(150);
13846   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13847   ins_encode %{
13848     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13849   %}
13850   ins_pipe(ialu_reg_mem);
13851 %}
13852 
13853 // And Memory with Register
13854 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13855 %{
13856   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13857   effect(KILL cr);
13858   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13859 
13860   ins_cost(150);
13861   format %{ "andq    $dst, $src\t# long" %}
13862   ins_encode %{
13863     __ andq($dst$$Address, $src$$Register);
13864   %}
13865   ins_pipe(ialu_mem_reg);
13866 %}
13867 
13868 // And Memory with Immediate
13869 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13870 %{
13871   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13872   effect(KILL cr);
13873   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13874 
13875   ins_cost(125);
13876   format %{ "andq    $dst, $src\t# long" %}
13877   ins_encode %{
13878     __ andq($dst$$Address, $src$$constant);
13879   %}
13880   ins_pipe(ialu_mem_imm);
13881 %}
13882 
13883 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13884 %{
13885   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13886   // because AND/OR works well enough for 8/32-bit values.
13887   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13888 
13889   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13890   effect(KILL cr);
13891 
13892   ins_cost(125);
13893   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13894   ins_encode %{
13895     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13896   %}
13897   ins_pipe(ialu_mem_imm);
13898 %}
13899 
13900 // BMI1 instructions
13901 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13902   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13903   predicate(UseBMI1Instructions);
13904   effect(KILL cr);
13905   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13906 
13907   ins_cost(125);
13908   format %{ "andnq  $dst, $src1, $src2" %}
13909 
13910   ins_encode %{
13911     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13912   %}
13913   ins_pipe(ialu_reg_mem);
13914 %}
13915 
13916 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13917   match(Set dst (AndL (XorL src1 minus_1) src2));
13918   predicate(UseBMI1Instructions);
13919   effect(KILL cr);
13920   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13921 
13922   format %{ "andnq  $dst, $src1, $src2" %}
13923 
13924   ins_encode %{
13925   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13926   %}
13927   ins_pipe(ialu_reg_mem);
13928 %}
13929 
13930 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13931   match(Set dst (AndL (SubL imm_zero src) src));
13932   predicate(UseBMI1Instructions);
13933   effect(KILL cr);
13934   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13935 
13936   format %{ "blsiq  $dst, $src" %}
13937 
13938   ins_encode %{
13939     __ blsiq($dst$$Register, $src$$Register);
13940   %}
13941   ins_pipe(ialu_reg);
13942 %}
13943 
13944 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13945   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13946   predicate(UseBMI1Instructions);
13947   effect(KILL cr);
13948   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13949 
13950   ins_cost(125);
13951   format %{ "blsiq  $dst, $src" %}
13952 
13953   ins_encode %{
13954     __ blsiq($dst$$Register, $src$$Address);
13955   %}
13956   ins_pipe(ialu_reg_mem);
13957 %}
13958 
13959 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13960 %{
13961   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13962   predicate(UseBMI1Instructions);
13963   effect(KILL cr);
13964   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13965 
13966   ins_cost(125);
13967   format %{ "blsmskq $dst, $src" %}
13968 
13969   ins_encode %{
13970     __ blsmskq($dst$$Register, $src$$Address);
13971   %}
13972   ins_pipe(ialu_reg_mem);
13973 %}
13974 
13975 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13976 %{
13977   match(Set dst (XorL (AddL src minus_1) src));
13978   predicate(UseBMI1Instructions);
13979   effect(KILL cr);
13980   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13981 
13982   format %{ "blsmskq $dst, $src" %}
13983 
13984   ins_encode %{
13985     __ blsmskq($dst$$Register, $src$$Register);
13986   %}
13987 
13988   ins_pipe(ialu_reg);
13989 %}
13990 
13991 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13992 %{
13993   match(Set dst (AndL (AddL src minus_1) src) );
13994   predicate(UseBMI1Instructions);
13995   effect(KILL cr);
13996   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13997 
13998   format %{ "blsrq  $dst, $src" %}
13999 
14000   ins_encode %{
14001     __ blsrq($dst$$Register, $src$$Register);
14002   %}
14003 
14004   ins_pipe(ialu_reg);
14005 %}
14006 
14007 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
14008 %{
14009   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
14010   predicate(UseBMI1Instructions);
14011   effect(KILL cr);
14012   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
14013 
14014   ins_cost(125);
14015   format %{ "blsrq  $dst, $src" %}
14016 
14017   ins_encode %{
14018     __ blsrq($dst$$Register, $src$$Address);
14019   %}
14020 
14021   ins_pipe(ialu_reg);
14022 %}
14023 
14024 // Or Instructions
14025 // Or Register with Register
14026 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14027 %{
14028   predicate(!UseAPX);
14029   match(Set dst (OrL dst src));
14030   effect(KILL cr);
14031   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14032 
14033   format %{ "orq     $dst, $src\t# long" %}
14034   ins_encode %{
14035     __ orq($dst$$Register, $src$$Register);
14036   %}
14037   ins_pipe(ialu_reg_reg);
14038 %}
14039 
14040 // Or Register with Register using New Data Destination (NDD)
14041 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14042 %{
14043   predicate(UseAPX);
14044   match(Set dst (OrL src1 src2));
14045   effect(KILL cr);
14046   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14047 
14048   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14049   ins_encode %{
14050     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14051 
14052   %}
14053   ins_pipe(ialu_reg_reg);
14054 %}
14055 
14056 // Use any_RegP to match R15 (TLS register) without spilling.
14057 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
14058   match(Set dst (OrL dst (CastP2X src)));
14059   effect(KILL cr);
14060   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14061 
14062   format %{ "orq     $dst, $src\t# long" %}
14063   ins_encode %{
14064     __ orq($dst$$Register, $src$$Register);
14065   %}
14066   ins_pipe(ialu_reg_reg);
14067 %}
14068 
14069 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
14070   match(Set dst (OrL src1 (CastP2X src2)));
14071   effect(KILL cr);
14072   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14073 
14074   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14075   ins_encode %{
14076     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14077   %}
14078   ins_pipe(ialu_reg_reg);
14079 %}
14080 
14081 // Or Register with Immediate
14082 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14083 %{
14084   predicate(!UseAPX);
14085   match(Set dst (OrL dst src));
14086   effect(KILL cr);
14087   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14088 
14089   format %{ "orq     $dst, $src\t# long" %}
14090   ins_encode %{
14091     __ orq($dst$$Register, $src$$constant);
14092   %}
14093   ins_pipe(ialu_reg);
14094 %}
14095 
14096 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14097 %{
14098   predicate(UseAPX);
14099   match(Set dst (OrL src1 src2));
14100   effect(KILL cr);
14101   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14102 
14103   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14104   ins_encode %{
14105     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14106   %}
14107   ins_pipe(ialu_reg);
14108 %}
14109 
14110 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14111 %{
14112   predicate(UseAPX);
14113   match(Set dst (OrL src1 src2));
14114   effect(KILL cr);
14115   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14116 
14117   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
14118   ins_encode %{
14119     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14120   %}
14121   ins_pipe(ialu_reg);
14122 %}
14123 
14124 // Or Memory with Immediate
14125 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14126 %{
14127   predicate(UseAPX);
14128   match(Set dst (OrL (LoadL src1) src2));
14129   effect(KILL cr);
14130   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14131 
14132   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14133   ins_encode %{
14134     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14135   %}
14136   ins_pipe(ialu_reg);
14137 %}
14138 
14139 // Or Register with Memory
14140 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14141 %{
14142   predicate(!UseAPX);
14143   match(Set dst (OrL dst (LoadL src)));
14144   effect(KILL cr);
14145   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14146 
14147   ins_cost(150);
14148   format %{ "orq     $dst, $src\t# long" %}
14149   ins_encode %{
14150     __ orq($dst$$Register, $src$$Address);
14151   %}
14152   ins_pipe(ialu_reg_mem);
14153 %}
14154 
14155 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14156 %{
14157   predicate(UseAPX);
14158   match(Set dst (OrL src1 (LoadL src2)));
14159   effect(KILL cr);
14160   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14161 
14162   ins_cost(150);
14163   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14164   ins_encode %{
14165     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14166   %}
14167   ins_pipe(ialu_reg_mem);
14168 %}
14169 
14170 // Or Memory with Register
14171 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14172 %{
14173   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14174   effect(KILL cr);
14175   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14176 
14177   ins_cost(150);
14178   format %{ "orq     $dst, $src\t# long" %}
14179   ins_encode %{
14180     __ orq($dst$$Address, $src$$Register);
14181   %}
14182   ins_pipe(ialu_mem_reg);
14183 %}
14184 
14185 // Or Memory with Immediate
14186 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14187 %{
14188   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14189   effect(KILL cr);
14190   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14191 
14192   ins_cost(125);
14193   format %{ "orq     $dst, $src\t# long" %}
14194   ins_encode %{
14195     __ orq($dst$$Address, $src$$constant);
14196   %}
14197   ins_pipe(ialu_mem_imm);
14198 %}
14199 
14200 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14201 %{
14202   // con should be a pure 64-bit power of 2 immediate
14203   // because AND/OR works well enough for 8/32-bit values.
14204   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14205 
14206   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14207   effect(KILL cr);
14208 
14209   ins_cost(125);
14210   format %{ "btsq    $dst, log2($con)\t# long" %}
14211   ins_encode %{
14212     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14213   %}
14214   ins_pipe(ialu_mem_imm);
14215 %}
14216 
14217 // Xor Instructions
14218 // Xor Register with Register
14219 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14220 %{
14221   predicate(!UseAPX);
14222   match(Set dst (XorL dst src));
14223   effect(KILL cr);
14224   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14225 
14226   format %{ "xorq    $dst, $src\t# long" %}
14227   ins_encode %{
14228     __ xorq($dst$$Register, $src$$Register);
14229   %}
14230   ins_pipe(ialu_reg_reg);
14231 %}
14232 
14233 // Xor Register with Register using New Data Destination (NDD)
14234 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14235 %{
14236   predicate(UseAPX);
14237   match(Set dst (XorL src1 src2));
14238   effect(KILL cr);
14239   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14240 
14241   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14242   ins_encode %{
14243     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14244   %}
14245   ins_pipe(ialu_reg_reg);
14246 %}
14247 
14248 // Xor Register with Immediate -1
14249 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14250 %{
14251   predicate(!UseAPX);
14252   match(Set dst (XorL dst imm));
14253 
14254   format %{ "notq   $dst" %}
14255   ins_encode %{
14256      __ notq($dst$$Register);
14257   %}
14258   ins_pipe(ialu_reg);
14259 %}
14260 
14261 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14262 %{
14263   predicate(UseAPX);
14264   match(Set dst (XorL src imm));
14265   flag(PD::Flag_ndd_demotable_opr1);
14266 
14267   format %{ "enotq   $dst, $src" %}
14268   ins_encode %{
14269     __ enotq($dst$$Register, $src$$Register);
14270   %}
14271   ins_pipe(ialu_reg);
14272 %}
14273 
14274 // Xor Register with Immediate
14275 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14276 %{
14277   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14278   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14279   match(Set dst (XorL dst src));
14280   effect(KILL cr);
14281   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14282 
14283   format %{ "xorq    $dst, $src\t# long" %}
14284   ins_encode %{
14285     __ xorq($dst$$Register, $src$$constant);
14286   %}
14287   ins_pipe(ialu_reg);
14288 %}
14289 
14290 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14291 %{
14292   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14293   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14294   match(Set dst (XorL src1 src2));
14295   effect(KILL cr);
14296   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14297 
14298   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14299   ins_encode %{
14300     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14301   %}
14302   ins_pipe(ialu_reg);
14303 %}
14304 
14305 // Xor Memory with Immediate
14306 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14307 %{
14308   predicate(UseAPX);
14309   match(Set dst (XorL (LoadL src1) src2));
14310   effect(KILL cr);
14311   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14312   ins_cost(150);
14313 
14314   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14315   ins_encode %{
14316     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14317   %}
14318   ins_pipe(ialu_reg);
14319 %}
14320 
14321 // Xor Register with Memory
14322 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14323 %{
14324   predicate(!UseAPX);
14325   match(Set dst (XorL dst (LoadL src)));
14326   effect(KILL cr);
14327   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14328 
14329   ins_cost(150);
14330   format %{ "xorq    $dst, $src\t# long" %}
14331   ins_encode %{
14332     __ xorq($dst$$Register, $src$$Address);
14333   %}
14334   ins_pipe(ialu_reg_mem);
14335 %}
14336 
14337 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14338 %{
14339   predicate(UseAPX);
14340   match(Set dst (XorL src1 (LoadL src2)));
14341   effect(KILL cr);
14342   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14343 
14344   ins_cost(150);
14345   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14346   ins_encode %{
14347     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14348   %}
14349   ins_pipe(ialu_reg_mem);
14350 %}
14351 
14352 // Xor Memory with Register
14353 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14354 %{
14355   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14356   effect(KILL cr);
14357   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14358 
14359   ins_cost(150);
14360   format %{ "xorq    $dst, $src\t# long" %}
14361   ins_encode %{
14362     __ xorq($dst$$Address, $src$$Register);
14363   %}
14364   ins_pipe(ialu_mem_reg);
14365 %}
14366 
14367 // Xor Memory with Immediate
14368 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14369 %{
14370   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14371   effect(KILL cr);
14372   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14373 
14374   ins_cost(125);
14375   format %{ "xorq    $dst, $src\t# long" %}
14376   ins_encode %{
14377     __ xorq($dst$$Address, $src$$constant);
14378   %}
14379   ins_pipe(ialu_mem_imm);
14380 %}
14381 
14382 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14383 %{
14384   match(Set dst (CmpLTMask p q));
14385   effect(KILL cr);
14386 
14387   ins_cost(400);
14388   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14389             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14390             "negl    $dst" %}
14391   ins_encode %{
14392     __ cmpl($p$$Register, $q$$Register);
14393     __ setcc(Assembler::less, $dst$$Register);
14394     __ negl($dst$$Register);
14395   %}
14396   ins_pipe(pipe_slow);
14397 %}
14398 
14399 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14400 %{
14401   match(Set dst (CmpLTMask dst zero));
14402   effect(KILL cr);
14403 
14404   ins_cost(100);
14405   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14406   ins_encode %{
14407     __ sarl($dst$$Register, 31);
14408   %}
14409   ins_pipe(ialu_reg);
14410 %}
14411 
14412 /* Better to save a register than avoid a branch */
14413 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14414 %{
14415   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14416   effect(KILL cr);
14417   ins_cost(300);
14418   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14419             "jge     done\n\t"
14420             "addl    $p,$y\n"
14421             "done:   " %}
14422   ins_encode %{
14423     Register Rp = $p$$Register;
14424     Register Rq = $q$$Register;
14425     Register Ry = $y$$Register;
14426     Label done;
14427     __ subl(Rp, Rq);
14428     __ jccb(Assembler::greaterEqual, done);
14429     __ addl(Rp, Ry);
14430     __ bind(done);
14431   %}
14432   ins_pipe(pipe_cmplt);
14433 %}
14434 
14435 /* Better to save a register than avoid a branch */
14436 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14437 %{
14438   match(Set y (AndI (CmpLTMask p q) y));
14439   effect(KILL cr);
14440 
14441   ins_cost(300);
14442 
14443   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14444             "jlt     done\n\t"
14445             "xorl    $y, $y\n"
14446             "done:   " %}
14447   ins_encode %{
14448     Register Rp = $p$$Register;
14449     Register Rq = $q$$Register;
14450     Register Ry = $y$$Register;
14451     Label done;
14452     __ cmpl(Rp, Rq);
14453     __ jccb(Assembler::less, done);
14454     __ xorl(Ry, Ry);
14455     __ bind(done);
14456   %}
14457   ins_pipe(pipe_cmplt);
14458 %}
14459 
14460 
14461 //---------- FP Instructions------------------------------------------------
14462 
14463 // Really expensive, avoid
14464 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14465 %{
14466   match(Set cr (CmpF src1 src2));
14467 
14468   ins_cost(500);
14469   format %{ "ucomiss $src1, $src2\n\t"
14470             "jnp,s   exit\n\t"
14471             "pushfq\t# saw NaN, set CF\n\t"
14472             "andq    [rsp], #0xffffff2b\n\t"
14473             "popfq\n"
14474     "exit:" %}
14475   ins_encode %{
14476     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14477     emit_cmpfp_fixup(masm);
14478   %}
14479   ins_pipe(pipe_slow);
14480 %}
14481 
14482 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14483   match(Set cr (CmpF src1 src2));
14484 
14485   ins_cost(100);
14486   format %{ "ucomiss $src1, $src2" %}
14487   ins_encode %{
14488     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14489   %}
14490   ins_pipe(pipe_slow);
14491 %}
14492 
14493 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14494   match(Set cr (CmpF src1 src2));
14495 
14496   ins_cost(100);
14497   format %{ "evucomxss $src1, $src2" %}
14498   ins_encode %{
14499     __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14500   %}
14501   ins_pipe(pipe_slow);
14502 %}
14503 
14504 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14505   match(Set cr (CmpF src1 (LoadF src2)));
14506 
14507   ins_cost(100);
14508   format %{ "ucomiss $src1, $src2" %}
14509   ins_encode %{
14510     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14511   %}
14512   ins_pipe(pipe_slow);
14513 %}
14514 
14515 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14516   match(Set cr (CmpF src1 (LoadF src2)));
14517 
14518   ins_cost(100);
14519   format %{ "evucomxss $src1, $src2" %}
14520   ins_encode %{
14521     __ evucomxss($src1$$XMMRegister, $src2$$Address);
14522   %}
14523   ins_pipe(pipe_slow);
14524 %}
14525 
14526 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14527   match(Set cr (CmpF src con));
14528 
14529   ins_cost(100);
14530   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14531   ins_encode %{
14532     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14533   %}
14534   ins_pipe(pipe_slow);
14535 %}
14536 
14537 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14538   match(Set cr (CmpF src con));
14539 
14540   ins_cost(100);
14541   format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14542   ins_encode %{
14543     __ evucomxss($src$$XMMRegister, $constantaddress($con));
14544   %}
14545   ins_pipe(pipe_slow);
14546 %}
14547 
14548 // Really expensive, avoid
14549 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14550 %{
14551   match(Set cr (CmpD src1 src2));
14552 
14553   ins_cost(500);
14554   format %{ "ucomisd $src1, $src2\n\t"
14555             "jnp,s   exit\n\t"
14556             "pushfq\t# saw NaN, set CF\n\t"
14557             "andq    [rsp], #0xffffff2b\n\t"
14558             "popfq\n"
14559     "exit:" %}
14560   ins_encode %{
14561     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14562     emit_cmpfp_fixup(masm);
14563   %}
14564   ins_pipe(pipe_slow);
14565 %}
14566 
14567 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14568   match(Set cr (CmpD src1 src2));
14569 
14570   ins_cost(100);
14571   format %{ "ucomisd $src1, $src2 test" %}
14572   ins_encode %{
14573     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14574   %}
14575   ins_pipe(pipe_slow);
14576 %}
14577 
14578 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14579   match(Set cr (CmpD src1 src2));
14580 
14581   ins_cost(100);
14582   format %{ "evucomxsd $src1, $src2 test" %}
14583   ins_encode %{
14584     __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14585   %}
14586   ins_pipe(pipe_slow);
14587 %}
14588 
14589 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14590   match(Set cr (CmpD src1 (LoadD src2)));
14591 
14592   ins_cost(100);
14593   format %{ "ucomisd $src1, $src2" %}
14594   ins_encode %{
14595     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14596   %}
14597   ins_pipe(pipe_slow);
14598 %}
14599 
14600 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14601   match(Set cr (CmpD src1 (LoadD src2)));
14602 
14603   ins_cost(100);
14604   format %{ "evucomxsd $src1, $src2" %}
14605   ins_encode %{
14606     __ evucomxsd($src1$$XMMRegister, $src2$$Address);
14607   %}
14608   ins_pipe(pipe_slow);
14609 %}
14610 
14611 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14612   match(Set cr (CmpD src con));
14613   ins_cost(100);
14614   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14615   ins_encode %{
14616     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14617   %}
14618   ins_pipe(pipe_slow);
14619 %}
14620 
14621 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14622   match(Set cr (CmpD src con));
14623 
14624   ins_cost(100);
14625   format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14626   ins_encode %{
14627     __ evucomxsd($src$$XMMRegister, $constantaddress($con));
14628   %}
14629   ins_pipe(pipe_slow);
14630 %}
14631 
14632 // Compare into -1,0,1
14633 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14634 %{
14635   match(Set dst (CmpF3 src1 src2));
14636   effect(KILL cr);
14637 
14638   ins_cost(275);
14639   format %{ "ucomiss $src1, $src2\n\t"
14640             "movl    $dst, #-1\n\t"
14641             "jp,s    done\n\t"
14642             "jb,s    done\n\t"
14643             "setne   $dst\n\t"
14644             "movzbl  $dst, $dst\n"
14645     "done:" %}
14646   ins_encode %{
14647     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14648     emit_cmpfp3(masm, $dst$$Register);
14649   %}
14650   ins_pipe(pipe_slow);
14651 %}
14652 
14653 // Compare into -1,0,1
14654 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14655 %{
14656   match(Set dst (CmpF3 src1 (LoadF src2)));
14657   effect(KILL cr);
14658 
14659   ins_cost(275);
14660   format %{ "ucomiss $src1, $src2\n\t"
14661             "movl    $dst, #-1\n\t"
14662             "jp,s    done\n\t"
14663             "jb,s    done\n\t"
14664             "setne   $dst\n\t"
14665             "movzbl  $dst, $dst\n"
14666     "done:" %}
14667   ins_encode %{
14668     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14669     emit_cmpfp3(masm, $dst$$Register);
14670   %}
14671   ins_pipe(pipe_slow);
14672 %}
14673 
14674 // Compare into -1,0,1
14675 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14676   match(Set dst (CmpF3 src con));
14677   effect(KILL cr);
14678 
14679   ins_cost(275);
14680   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14681             "movl    $dst, #-1\n\t"
14682             "jp,s    done\n\t"
14683             "jb,s    done\n\t"
14684             "setne   $dst\n\t"
14685             "movzbl  $dst, $dst\n"
14686     "done:" %}
14687   ins_encode %{
14688     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14689     emit_cmpfp3(masm, $dst$$Register);
14690   %}
14691   ins_pipe(pipe_slow);
14692 %}
14693 
14694 // Compare into -1,0,1
14695 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14696 %{
14697   match(Set dst (CmpD3 src1 src2));
14698   effect(KILL cr);
14699 
14700   ins_cost(275);
14701   format %{ "ucomisd $src1, $src2\n\t"
14702             "movl    $dst, #-1\n\t"
14703             "jp,s    done\n\t"
14704             "jb,s    done\n\t"
14705             "setne   $dst\n\t"
14706             "movzbl  $dst, $dst\n"
14707     "done:" %}
14708   ins_encode %{
14709     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14710     emit_cmpfp3(masm, $dst$$Register);
14711   %}
14712   ins_pipe(pipe_slow);
14713 %}
14714 
14715 // Compare into -1,0,1
14716 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14717 %{
14718   match(Set dst (CmpD3 src1 (LoadD src2)));
14719   effect(KILL cr);
14720 
14721   ins_cost(275);
14722   format %{ "ucomisd $src1, $src2\n\t"
14723             "movl    $dst, #-1\n\t"
14724             "jp,s    done\n\t"
14725             "jb,s    done\n\t"
14726             "setne   $dst\n\t"
14727             "movzbl  $dst, $dst\n"
14728     "done:" %}
14729   ins_encode %{
14730     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14731     emit_cmpfp3(masm, $dst$$Register);
14732   %}
14733   ins_pipe(pipe_slow);
14734 %}
14735 
14736 // Compare into -1,0,1
14737 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14738   match(Set dst (CmpD3 src con));
14739   effect(KILL cr);
14740 
14741   ins_cost(275);
14742   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14743             "movl    $dst, #-1\n\t"
14744             "jp,s    done\n\t"
14745             "jb,s    done\n\t"
14746             "setne   $dst\n\t"
14747             "movzbl  $dst, $dst\n"
14748     "done:" %}
14749   ins_encode %{
14750     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14751     emit_cmpfp3(masm, $dst$$Register);
14752   %}
14753   ins_pipe(pipe_slow);
14754 %}
14755 
14756 //----------Arithmetic Conversion Instructions---------------------------------
14757 
14758 instruct convF2D_reg_reg(regD dst, regF src)
14759 %{
14760   match(Set dst (ConvF2D src));
14761 
14762   format %{ "cvtss2sd $dst, $src" %}
14763   ins_encode %{
14764     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14765   %}
14766   ins_pipe(pipe_slow); // XXX
14767 %}
14768 
14769 instruct convF2D_reg_mem(regD dst, memory src)
14770 %{
14771   predicate(UseAVX == 0);
14772   match(Set dst (ConvF2D (LoadF src)));
14773 
14774   format %{ "cvtss2sd $dst, $src" %}
14775   ins_encode %{
14776     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14777   %}
14778   ins_pipe(pipe_slow); // XXX
14779 %}
14780 
14781 instruct convD2F_reg_reg(regF dst, regD src)
14782 %{
14783   match(Set dst (ConvD2F src));
14784 
14785   format %{ "cvtsd2ss $dst, $src" %}
14786   ins_encode %{
14787     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14788   %}
14789   ins_pipe(pipe_slow); // XXX
14790 %}
14791 
14792 instruct convD2F_reg_mem(regF dst, memory src)
14793 %{
14794   predicate(UseAVX == 0);
14795   match(Set dst (ConvD2F (LoadD src)));
14796 
14797   format %{ "cvtsd2ss $dst, $src" %}
14798   ins_encode %{
14799     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14800   %}
14801   ins_pipe(pipe_slow); // XXX
14802 %}
14803 
14804 // XXX do mem variants
14805 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14806 %{
14807   predicate(!VM_Version::supports_avx10_2());
14808   match(Set dst (ConvF2I src));
14809   effect(KILL cr);
14810   format %{ "convert_f2i $dst, $src" %}
14811   ins_encode %{
14812     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14813   %}
14814   ins_pipe(pipe_slow);
14815 %}
14816 
14817 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14818 %{
14819   predicate(VM_Version::supports_avx10_2());
14820   match(Set dst (ConvF2I src));
14821   format %{ "evcvttss2sisl $dst, $src" %}
14822   ins_encode %{
14823     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14824   %}
14825   ins_pipe(pipe_slow);
14826 %}
14827 
14828 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14829 %{
14830   predicate(VM_Version::supports_avx10_2());
14831   match(Set dst (ConvF2I (LoadF src)));
14832   format %{ "evcvttss2sisl $dst, $src" %}
14833   ins_encode %{
14834     __ evcvttss2sisl($dst$$Register, $src$$Address);
14835   %}
14836   ins_pipe(pipe_slow);
14837 %}
14838 
14839 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14840 %{
14841   predicate(!VM_Version::supports_avx10_2());
14842   match(Set dst (ConvF2L src));
14843   effect(KILL cr);
14844   format %{ "convert_f2l $dst, $src"%}
14845   ins_encode %{
14846     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14847   %}
14848   ins_pipe(pipe_slow);
14849 %}
14850 
14851 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14852 %{
14853   predicate(VM_Version::supports_avx10_2());
14854   match(Set dst (ConvF2L src));
14855   format %{ "evcvttss2sisq $dst, $src" %}
14856   ins_encode %{
14857     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14858   %}
14859   ins_pipe(pipe_slow);
14860 %}
14861 
14862 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14863 %{
14864   predicate(VM_Version::supports_avx10_2());
14865   match(Set dst (ConvF2L (LoadF src)));
14866   format %{ "evcvttss2sisq $dst, $src" %}
14867   ins_encode %{
14868     __ evcvttss2sisq($dst$$Register, $src$$Address);
14869   %}
14870   ins_pipe(pipe_slow);
14871 %}
14872 
14873 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14874 %{
14875   predicate(!VM_Version::supports_avx10_2());
14876   match(Set dst (ConvD2I src));
14877   effect(KILL cr);
14878   format %{ "convert_d2i $dst, $src"%}
14879   ins_encode %{
14880     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14881   %}
14882   ins_pipe(pipe_slow);
14883 %}
14884 
14885 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14886 %{
14887   predicate(VM_Version::supports_avx10_2());
14888   match(Set dst (ConvD2I src));
14889   format %{ "evcvttsd2sisl $dst, $src" %}
14890   ins_encode %{
14891     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14892   %}
14893   ins_pipe(pipe_slow);
14894 %}
14895 
14896 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14897 %{
14898   predicate(VM_Version::supports_avx10_2());
14899   match(Set dst (ConvD2I (LoadD src)));
14900   format %{ "evcvttsd2sisl $dst, $src" %}
14901   ins_encode %{
14902     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14903   %}
14904   ins_pipe(pipe_slow);
14905 %}
14906 
14907 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14908 %{
14909   predicate(!VM_Version::supports_avx10_2());
14910   match(Set dst (ConvD2L src));
14911   effect(KILL cr);
14912   format %{ "convert_d2l $dst, $src"%}
14913   ins_encode %{
14914     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14915   %}
14916   ins_pipe(pipe_slow);
14917 %}
14918 
14919 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14920 %{
14921   predicate(VM_Version::supports_avx10_2());
14922   match(Set dst (ConvD2L src));
14923   format %{ "evcvttsd2sisq $dst, $src" %}
14924   ins_encode %{
14925     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14926   %}
14927   ins_pipe(pipe_slow);
14928 %}
14929 
14930 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14931 %{
14932   predicate(VM_Version::supports_avx10_2());
14933   match(Set dst (ConvD2L (LoadD src)));
14934   format %{ "evcvttsd2sisq $dst, $src" %}
14935   ins_encode %{
14936     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14937   %}
14938   ins_pipe(pipe_slow);
14939 %}
14940 
14941 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14942 %{
14943   match(Set dst (RoundD src));
14944   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14945   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14946   ins_encode %{
14947     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14948   %}
14949   ins_pipe(pipe_slow);
14950 %}
14951 
14952 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14953 %{
14954   match(Set dst (RoundF src));
14955   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14956   format %{ "round_float $dst,$src" %}
14957   ins_encode %{
14958     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14959   %}
14960   ins_pipe(pipe_slow);
14961 %}
14962 
14963 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14964 %{
14965   predicate(!UseXmmI2F);
14966   match(Set dst (ConvI2F src));
14967 
14968   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14969   ins_encode %{
14970     if (UseAVX > 0) {
14971       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14972     }
14973     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14974   %}
14975   ins_pipe(pipe_slow); // XXX
14976 %}
14977 
14978 instruct convI2F_reg_mem(regF dst, memory src)
14979 %{
14980   predicate(UseAVX == 0);
14981   match(Set dst (ConvI2F (LoadI src)));
14982 
14983   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14984   ins_encode %{
14985     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14986   %}
14987   ins_pipe(pipe_slow); // XXX
14988 %}
14989 
14990 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14991 %{
14992   predicate(!UseXmmI2D);
14993   match(Set dst (ConvI2D src));
14994 
14995   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14996   ins_encode %{
14997     if (UseAVX > 0) {
14998       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14999     }
15000     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
15001   %}
15002   ins_pipe(pipe_slow); // XXX
15003 %}
15004 
15005 instruct convI2D_reg_mem(regD dst, memory src)
15006 %{
15007   predicate(UseAVX == 0);
15008   match(Set dst (ConvI2D (LoadI src)));
15009 
15010   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
15011   ins_encode %{
15012     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
15013   %}
15014   ins_pipe(pipe_slow); // XXX
15015 %}
15016 
15017 instruct convXI2F_reg(regF dst, rRegI src)
15018 %{
15019   predicate(UseXmmI2F);
15020   match(Set dst (ConvI2F src));
15021 
15022   format %{ "movdl $dst, $src\n\t"
15023             "cvtdq2psl $dst, $dst\t# i2f" %}
15024   ins_encode %{
15025     __ movdl($dst$$XMMRegister, $src$$Register);
15026     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
15027   %}
15028   ins_pipe(pipe_slow); // XXX
15029 %}
15030 
15031 instruct convXI2D_reg(regD dst, rRegI src)
15032 %{
15033   predicate(UseXmmI2D);
15034   match(Set dst (ConvI2D src));
15035 
15036   format %{ "movdl $dst, $src\n\t"
15037             "cvtdq2pdl $dst, $dst\t# i2d" %}
15038   ins_encode %{
15039     __ movdl($dst$$XMMRegister, $src$$Register);
15040     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
15041   %}
15042   ins_pipe(pipe_slow); // XXX
15043 %}
15044 
15045 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
15046 %{
15047   match(Set dst (ConvL2F src));
15048 
15049   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
15050   ins_encode %{
15051     if (UseAVX > 0) {
15052       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
15053     }
15054     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
15055   %}
15056   ins_pipe(pipe_slow); // XXX
15057 %}
15058 
15059 instruct convL2F_reg_mem(regF dst, memory src)
15060 %{
15061   predicate(UseAVX == 0);
15062   match(Set dst (ConvL2F (LoadL src)));
15063 
15064   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
15065   ins_encode %{
15066     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
15067   %}
15068   ins_pipe(pipe_slow); // XXX
15069 %}
15070 
15071 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
15072 %{
15073   match(Set dst (ConvL2D src));
15074 
15075   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15076   ins_encode %{
15077     if (UseAVX > 0) {
15078       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
15079     }
15080     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
15081   %}
15082   ins_pipe(pipe_slow); // XXX
15083 %}
15084 
15085 instruct convL2D_reg_mem(regD dst, memory src)
15086 %{
15087   predicate(UseAVX == 0);
15088   match(Set dst (ConvL2D (LoadL src)));
15089 
15090   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15091   ins_encode %{
15092     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15093   %}
15094   ins_pipe(pipe_slow); // XXX
15095 %}
15096 
15097 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15098 %{
15099   match(Set dst (ConvI2L src));
15100 
15101   ins_cost(125);
15102   format %{ "movslq  $dst, $src\t# i2l" %}
15103   ins_encode %{
15104     __ movslq($dst$$Register, $src$$Register);
15105   %}
15106   ins_pipe(ialu_reg_reg);
15107 %}
15108 
15109 // Zero-extend convert int to long
15110 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15111 %{
15112   match(Set dst (AndL (ConvI2L src) mask));
15113 
15114   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15115   ins_encode %{
15116     if ($dst$$reg != $src$$reg) {
15117       __ movl($dst$$Register, $src$$Register);
15118     }
15119   %}
15120   ins_pipe(ialu_reg_reg);
15121 %}
15122 
15123 // Zero-extend convert int to long
15124 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15125 %{
15126   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15127 
15128   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15129   ins_encode %{
15130     __ movl($dst$$Register, $src$$Address);
15131   %}
15132   ins_pipe(ialu_reg_mem);
15133 %}
15134 
15135 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15136 %{
15137   match(Set dst (AndL src mask));
15138 
15139   format %{ "movl    $dst, $src\t# zero-extend long" %}
15140   ins_encode %{
15141     __ movl($dst$$Register, $src$$Register);
15142   %}
15143   ins_pipe(ialu_reg_reg);
15144 %}
15145 
15146 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15147 %{
15148   match(Set dst (ConvL2I src));
15149 
15150   format %{ "movl    $dst, $src\t# l2i" %}
15151   ins_encode %{
15152     __ movl($dst$$Register, $src$$Register);
15153   %}
15154   ins_pipe(ialu_reg_reg);
15155 %}
15156 
15157 
15158 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15159   match(Set dst (MoveF2I src));
15160   effect(DEF dst, USE src);
15161 
15162   ins_cost(125);
15163   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
15164   ins_encode %{
15165     __ movl($dst$$Register, Address(rsp, $src$$disp));
15166   %}
15167   ins_pipe(ialu_reg_mem);
15168 %}
15169 
15170 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15171   match(Set dst (MoveI2F src));
15172   effect(DEF dst, USE src);
15173 
15174   ins_cost(125);
15175   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
15176   ins_encode %{
15177     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15178   %}
15179   ins_pipe(pipe_slow);
15180 %}
15181 
15182 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15183   match(Set dst (MoveD2L src));
15184   effect(DEF dst, USE src);
15185 
15186   ins_cost(125);
15187   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
15188   ins_encode %{
15189     __ movq($dst$$Register, Address(rsp, $src$$disp));
15190   %}
15191   ins_pipe(ialu_reg_mem);
15192 %}
15193 
15194 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15195   predicate(!UseXmmLoadAndClearUpper);
15196   match(Set dst (MoveL2D src));
15197   effect(DEF dst, USE src);
15198 
15199   ins_cost(125);
15200   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
15201   ins_encode %{
15202     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15203   %}
15204   ins_pipe(pipe_slow);
15205 %}
15206 
15207 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15208   predicate(UseXmmLoadAndClearUpper);
15209   match(Set dst (MoveL2D src));
15210   effect(DEF dst, USE src);
15211 
15212   ins_cost(125);
15213   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
15214   ins_encode %{
15215     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15216   %}
15217   ins_pipe(pipe_slow);
15218 %}
15219 
15220 
15221 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15222   match(Set dst (MoveF2I src));
15223   effect(DEF dst, USE src);
15224 
15225   ins_cost(95); // XXX
15226   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
15227   ins_encode %{
15228     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15229   %}
15230   ins_pipe(pipe_slow);
15231 %}
15232 
15233 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15234   match(Set dst (MoveI2F src));
15235   effect(DEF dst, USE src);
15236 
15237   ins_cost(100);
15238   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
15239   ins_encode %{
15240     __ movl(Address(rsp, $dst$$disp), $src$$Register);
15241   %}
15242   ins_pipe( ialu_mem_reg );
15243 %}
15244 
15245 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15246   match(Set dst (MoveD2L src));
15247   effect(DEF dst, USE src);
15248 
15249   ins_cost(95); // XXX
15250   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
15251   ins_encode %{
15252     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15253   %}
15254   ins_pipe(pipe_slow);
15255 %}
15256 
15257 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15258   match(Set dst (MoveL2D src));
15259   effect(DEF dst, USE src);
15260 
15261   ins_cost(100);
15262   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
15263   ins_encode %{
15264     __ movq(Address(rsp, $dst$$disp), $src$$Register);
15265   %}
15266   ins_pipe(ialu_mem_reg);
15267 %}
15268 
15269 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15270   match(Set dst (MoveF2I src));
15271   effect(DEF dst, USE src);
15272   ins_cost(85);
15273   format %{ "movd    $dst,$src\t# MoveF2I" %}
15274   ins_encode %{
15275     __ movdl($dst$$Register, $src$$XMMRegister);
15276   %}
15277   ins_pipe( pipe_slow );
15278 %}
15279 
15280 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15281   match(Set dst (MoveD2L src));
15282   effect(DEF dst, USE src);
15283   ins_cost(85);
15284   format %{ "movd    $dst,$src\t# MoveD2L" %}
15285   ins_encode %{
15286     __ movdq($dst$$Register, $src$$XMMRegister);
15287   %}
15288   ins_pipe( pipe_slow );
15289 %}
15290 
15291 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15292   match(Set dst (MoveI2F src));
15293   effect(DEF dst, USE src);
15294   ins_cost(100);
15295   format %{ "movd    $dst,$src\t# MoveI2F" %}
15296   ins_encode %{
15297     __ movdl($dst$$XMMRegister, $src$$Register);
15298   %}
15299   ins_pipe( pipe_slow );
15300 %}
15301 
15302 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15303   match(Set dst (MoveL2D src));
15304   effect(DEF dst, USE src);
15305   ins_cost(100);
15306   format %{ "movd    $dst,$src\t# MoveL2D" %}
15307   ins_encode %{
15308      __ movdq($dst$$XMMRegister, $src$$Register);
15309   %}
15310   ins_pipe( pipe_slow );
15311 %}
15312 
15313 
15314 // Fast clearing of an array
15315 // Small non-constant lenght ClearArray for non-AVX512 targets.
15316 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15317                   Universe dummy, rFlagsReg cr)
15318 %{
15319   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15320   match(Set dummy (ClearArray (Binary cnt base) val));
15321   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15322 
15323   format %{ $$template
15324     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15325     $$emit$$"jg      LARGE\n\t"
15326     $$emit$$"dec     rcx\n\t"
15327     $$emit$$"js      DONE\t# Zero length\n\t"
15328     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15329     $$emit$$"dec     rcx\n\t"
15330     $$emit$$"jge     LOOP\n\t"
15331     $$emit$$"jmp     DONE\n\t"
15332     $$emit$$"# LARGE:\n\t"
15333     if (UseFastStosb) {
15334        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15335        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15336     } else if (UseXMMForObjInit) {
15337        $$emit$$"movdq   $tmp, $val\n\t"
15338        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15339        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15340        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15341        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15342        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15343        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15344        $$emit$$"add     0x40,rax\n\t"
15345        $$emit$$"# L_zero_64_bytes:\n\t"
15346        $$emit$$"sub     0x8,rcx\n\t"
15347        $$emit$$"jge     L_loop\n\t"
15348        $$emit$$"add     0x4,rcx\n\t"
15349        $$emit$$"jl      L_tail\n\t"
15350        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15351        $$emit$$"add     0x20,rax\n\t"
15352        $$emit$$"sub     0x4,rcx\n\t"
15353        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15354        $$emit$$"add     0x4,rcx\n\t"
15355        $$emit$$"jle     L_end\n\t"
15356        $$emit$$"dec     rcx\n\t"
15357        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15358        $$emit$$"vmovq   xmm0,(rax)\n\t"
15359        $$emit$$"add     0x8,rax\n\t"
15360        $$emit$$"dec     rcx\n\t"
15361        $$emit$$"jge     L_sloop\n\t"
15362        $$emit$$"# L_end:\n\t"
15363     } else {
15364        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15365     }
15366     $$emit$$"# DONE"
15367   %}
15368   ins_encode %{
15369     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15370                  $tmp$$XMMRegister, false, false);
15371   %}
15372   ins_pipe(pipe_slow);
15373 %}
15374 
15375 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15376                             Universe dummy, rFlagsReg cr)
15377 %{
15378   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15379   match(Set dummy (ClearArray (Binary cnt base) val));
15380   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15381 
15382   format %{ $$template
15383     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15384     $$emit$$"jg      LARGE\n\t"
15385     $$emit$$"dec     rcx\n\t"
15386     $$emit$$"js      DONE\t# Zero length\n\t"
15387     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15388     $$emit$$"dec     rcx\n\t"
15389     $$emit$$"jge     LOOP\n\t"
15390     $$emit$$"jmp     DONE\n\t"
15391     $$emit$$"# LARGE:\n\t"
15392     if (UseXMMForObjInit) {
15393        $$emit$$"movdq   $tmp, $val\n\t"
15394        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15395        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15396        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15397        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15398        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15399        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15400        $$emit$$"add     0x40,rax\n\t"
15401        $$emit$$"# L_zero_64_bytes:\n\t"
15402        $$emit$$"sub     0x8,rcx\n\t"
15403        $$emit$$"jge     L_loop\n\t"
15404        $$emit$$"add     0x4,rcx\n\t"
15405        $$emit$$"jl      L_tail\n\t"
15406        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15407        $$emit$$"add     0x20,rax\n\t"
15408        $$emit$$"sub     0x4,rcx\n\t"
15409        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15410        $$emit$$"add     0x4,rcx\n\t"
15411        $$emit$$"jle     L_end\n\t"
15412        $$emit$$"dec     rcx\n\t"
15413        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15414        $$emit$$"vmovq   xmm0,(rax)\n\t"
15415        $$emit$$"add     0x8,rax\n\t"
15416        $$emit$$"dec     rcx\n\t"
15417        $$emit$$"jge     L_sloop\n\t"
15418        $$emit$$"# L_end:\n\t"
15419     } else {
15420        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15421     }
15422     $$emit$$"# DONE"
15423   %}
15424   ins_encode %{
15425     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15426                  $tmp$$XMMRegister, false, true);
15427   %}
15428   ins_pipe(pipe_slow);
15429 %}
15430 
15431 // Small non-constant length ClearArray for AVX512 targets.
15432 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15433                        Universe dummy, rFlagsReg cr)
15434 %{
15435   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15436   match(Set dummy (ClearArray (Binary cnt base) val));
15437   ins_cost(125);
15438   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15439 
15440   format %{ $$template
15441     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15442     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15443     $$emit$$"jg      LARGE\n\t"
15444     $$emit$$"dec     rcx\n\t"
15445     $$emit$$"js      DONE\t# Zero length\n\t"
15446     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15447     $$emit$$"dec     rcx\n\t"
15448     $$emit$$"jge     LOOP\n\t"
15449     $$emit$$"jmp     DONE\n\t"
15450     $$emit$$"# LARGE:\n\t"
15451     if (UseFastStosb) {
15452        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15453        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15454     } else if (UseXMMForObjInit) {
15455        $$emit$$"mov     rdi,rax\n\t"
15456        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15457        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15458        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15459        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15460        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15461        $$emit$$"add     0x40,rax\n\t"
15462        $$emit$$"# L_zero_64_bytes:\n\t"
15463        $$emit$$"sub     0x8,rcx\n\t"
15464        $$emit$$"jge     L_loop\n\t"
15465        $$emit$$"add     0x4,rcx\n\t"
15466        $$emit$$"jl      L_tail\n\t"
15467        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15468        $$emit$$"add     0x20,rax\n\t"
15469        $$emit$$"sub     0x4,rcx\n\t"
15470        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15471        $$emit$$"add     0x4,rcx\n\t"
15472        $$emit$$"jle     L_end\n\t"
15473        $$emit$$"dec     rcx\n\t"
15474        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15475        $$emit$$"vmovq   xmm0,(rax)\n\t"
15476        $$emit$$"add     0x8,rax\n\t"
15477        $$emit$$"dec     rcx\n\t"
15478        $$emit$$"jge     L_sloop\n\t"
15479        $$emit$$"# L_end:\n\t"
15480     } else {
15481        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15482     }
15483     $$emit$$"# DONE"
15484   %}
15485   ins_encode %{
15486     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15487                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15488   %}
15489   ins_pipe(pipe_slow);
15490 %}
15491 
15492 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15493                                  Universe dummy, rFlagsReg cr)
15494 %{
15495   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15496   match(Set dummy (ClearArray (Binary cnt base) val));
15497   ins_cost(125);
15498   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15499 
15500   format %{ $$template
15501     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15502     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15503     $$emit$$"jg      LARGE\n\t"
15504     $$emit$$"dec     rcx\n\t"
15505     $$emit$$"js      DONE\t# Zero length\n\t"
15506     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15507     $$emit$$"dec     rcx\n\t"
15508     $$emit$$"jge     LOOP\n\t"
15509     $$emit$$"jmp     DONE\n\t"
15510     $$emit$$"# LARGE:\n\t"
15511     if (UseFastStosb) {
15512        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15513        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15514     } else if (UseXMMForObjInit) {
15515        $$emit$$"mov     rdi,rax\n\t"
15516        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15517        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15518        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15519        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15520        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15521        $$emit$$"add     0x40,rax\n\t"
15522        $$emit$$"# L_zero_64_bytes:\n\t"
15523        $$emit$$"sub     0x8,rcx\n\t"
15524        $$emit$$"jge     L_loop\n\t"
15525        $$emit$$"add     0x4,rcx\n\t"
15526        $$emit$$"jl      L_tail\n\t"
15527        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15528        $$emit$$"add     0x20,rax\n\t"
15529        $$emit$$"sub     0x4,rcx\n\t"
15530        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15531        $$emit$$"add     0x4,rcx\n\t"
15532        $$emit$$"jle     L_end\n\t"
15533        $$emit$$"dec     rcx\n\t"
15534        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15535        $$emit$$"vmovq   xmm0,(rax)\n\t"
15536        $$emit$$"add     0x8,rax\n\t"
15537        $$emit$$"dec     rcx\n\t"
15538        $$emit$$"jge     L_sloop\n\t"
15539        $$emit$$"# L_end:\n\t"
15540     } else {
15541        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15542     }
15543     $$emit$$"# DONE"
15544   %}
15545   ins_encode %{
15546     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15547                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15548   %}
15549   ins_pipe(pipe_slow);
15550 %}
15551 
15552 // Large non-constant length ClearArray for non-AVX512 targets.
15553 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15554                         Universe dummy, rFlagsReg cr)
15555 %{
15556   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15557   match(Set dummy (ClearArray (Binary cnt base) val));
15558   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15559 
15560   format %{ $$template
15561     if (UseFastStosb) {
15562        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15563        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15564     } else if (UseXMMForObjInit) {
15565        $$emit$$"movdq   $tmp, $val\n\t"
15566        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15567        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15568        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15569        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15570        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15571        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15572        $$emit$$"add     0x40,rax\n\t"
15573        $$emit$$"# L_zero_64_bytes:\n\t"
15574        $$emit$$"sub     0x8,rcx\n\t"
15575        $$emit$$"jge     L_loop\n\t"
15576        $$emit$$"add     0x4,rcx\n\t"
15577        $$emit$$"jl      L_tail\n\t"
15578        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15579        $$emit$$"add     0x20,rax\n\t"
15580        $$emit$$"sub     0x4,rcx\n\t"
15581        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15582        $$emit$$"add     0x4,rcx\n\t"
15583        $$emit$$"jle     L_end\n\t"
15584        $$emit$$"dec     rcx\n\t"
15585        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15586        $$emit$$"vmovq   xmm0,(rax)\n\t"
15587        $$emit$$"add     0x8,rax\n\t"
15588        $$emit$$"dec     rcx\n\t"
15589        $$emit$$"jge     L_sloop\n\t"
15590        $$emit$$"# L_end:\n\t"
15591     } else {
15592        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15593     }
15594   %}
15595   ins_encode %{
15596     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15597                  $tmp$$XMMRegister, true, false);
15598   %}
15599   ins_pipe(pipe_slow);
15600 %}
15601 
15602 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15603                                   Universe dummy, rFlagsReg cr)
15604 %{
15605   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15606   match(Set dummy (ClearArray (Binary cnt base) val));
15607   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15608 
15609   format %{ $$template
15610     if (UseXMMForObjInit) {
15611        $$emit$$"movdq   $tmp, $val\n\t"
15612        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15613        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15614        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15615        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15616        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15617        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15618        $$emit$$"add     0x40,rax\n\t"
15619        $$emit$$"# L_zero_64_bytes:\n\t"
15620        $$emit$$"sub     0x8,rcx\n\t"
15621        $$emit$$"jge     L_loop\n\t"
15622        $$emit$$"add     0x4,rcx\n\t"
15623        $$emit$$"jl      L_tail\n\t"
15624        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15625        $$emit$$"add     0x20,rax\n\t"
15626        $$emit$$"sub     0x4,rcx\n\t"
15627        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15628        $$emit$$"add     0x4,rcx\n\t"
15629        $$emit$$"jle     L_end\n\t"
15630        $$emit$$"dec     rcx\n\t"
15631        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15632        $$emit$$"vmovq   xmm0,(rax)\n\t"
15633        $$emit$$"add     0x8,rax\n\t"
15634        $$emit$$"dec     rcx\n\t"
15635        $$emit$$"jge     L_sloop\n\t"
15636        $$emit$$"# L_end:\n\t"
15637     } else {
15638        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15639     }
15640   %}
15641   ins_encode %{
15642     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15643                  $tmp$$XMMRegister, true, true);
15644   %}
15645   ins_pipe(pipe_slow);
15646 %}
15647 
15648 // Large non-constant length ClearArray for AVX512 targets.
15649 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15650                              Universe dummy, rFlagsReg cr)
15651 %{
15652   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15653   match(Set dummy (ClearArray (Binary cnt base) val));
15654   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15655 
15656   format %{ $$template
15657     if (UseFastStosb) {
15658        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15659        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15660        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15661     } else if (UseXMMForObjInit) {
15662        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15663        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15664        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15665        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15666        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15667        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15668        $$emit$$"add     0x40,rax\n\t"
15669        $$emit$$"# L_zero_64_bytes:\n\t"
15670        $$emit$$"sub     0x8,rcx\n\t"
15671        $$emit$$"jge     L_loop\n\t"
15672        $$emit$$"add     0x4,rcx\n\t"
15673        $$emit$$"jl      L_tail\n\t"
15674        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15675        $$emit$$"add     0x20,rax\n\t"
15676        $$emit$$"sub     0x4,rcx\n\t"
15677        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15678        $$emit$$"add     0x4,rcx\n\t"
15679        $$emit$$"jle     L_end\n\t"
15680        $$emit$$"dec     rcx\n\t"
15681        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15682        $$emit$$"vmovq   xmm0,(rax)\n\t"
15683        $$emit$$"add     0x8,rax\n\t"
15684        $$emit$$"dec     rcx\n\t"
15685        $$emit$$"jge     L_sloop\n\t"
15686        $$emit$$"# L_end:\n\t"
15687     } else {
15688        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15689        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15690     }
15691   %}
15692   ins_encode %{
15693     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15694                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15695   %}
15696   ins_pipe(pipe_slow);
15697 %}
15698 
15699 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15700                                        Universe dummy, rFlagsReg cr)
15701 %{
15702   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15703   match(Set dummy (ClearArray (Binary cnt base) val));
15704   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15705 
15706   format %{ $$template
15707     if (UseFastStosb) {
15708        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15709        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15710        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15711     } else if (UseXMMForObjInit) {
15712        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15713        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15714        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15715        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15716        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15717        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15718        $$emit$$"add     0x40,rax\n\t"
15719        $$emit$$"# L_zero_64_bytes:\n\t"
15720        $$emit$$"sub     0x8,rcx\n\t"
15721        $$emit$$"jge     L_loop\n\t"
15722        $$emit$$"add     0x4,rcx\n\t"
15723        $$emit$$"jl      L_tail\n\t"
15724        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15725        $$emit$$"add     0x20,rax\n\t"
15726        $$emit$$"sub     0x4,rcx\n\t"
15727        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15728        $$emit$$"add     0x4,rcx\n\t"
15729        $$emit$$"jle     L_end\n\t"
15730        $$emit$$"dec     rcx\n\t"
15731        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15732        $$emit$$"vmovq   xmm0,(rax)\n\t"
15733        $$emit$$"add     0x8,rax\n\t"
15734        $$emit$$"dec     rcx\n\t"
15735        $$emit$$"jge     L_sloop\n\t"
15736        $$emit$$"# L_end:\n\t"
15737     } else {
15738        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15739        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15740     }
15741   %}
15742   ins_encode %{
15743     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15744                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15745   %}
15746   ins_pipe(pipe_slow);
15747 %}
15748 
15749 // Small constant length ClearArray for AVX512 targets.
15750 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15751 %{
15752   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15753             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15754   match(Set dummy (ClearArray (Binary cnt base) val));
15755   ins_cost(100);
15756   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15757   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15758   ins_encode %{
15759     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15760   %}
15761   ins_pipe(pipe_slow);
15762 %}
15763 
15764 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15765                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15766 %{
15767   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15768   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15769   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15770 
15771   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15772   ins_encode %{
15773     __ string_compare($str1$$Register, $str2$$Register,
15774                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15775                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15776   %}
15777   ins_pipe( pipe_slow );
15778 %}
15779 
15780 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15781                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15782 %{
15783   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15784   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15785   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15786 
15787   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15788   ins_encode %{
15789     __ string_compare($str1$$Register, $str2$$Register,
15790                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15791                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15792   %}
15793   ins_pipe( pipe_slow );
15794 %}
15795 
15796 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15797                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15798 %{
15799   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15800   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15801   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15802 
15803   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15804   ins_encode %{
15805     __ string_compare($str1$$Register, $str2$$Register,
15806                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15807                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15808   %}
15809   ins_pipe( pipe_slow );
15810 %}
15811 
15812 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15813                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15814 %{
15815   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15816   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15817   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15818 
15819   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15820   ins_encode %{
15821     __ string_compare($str1$$Register, $str2$$Register,
15822                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15823                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15824   %}
15825   ins_pipe( pipe_slow );
15826 %}
15827 
15828 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15829                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15830 %{
15831   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15832   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15833   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15834 
15835   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15836   ins_encode %{
15837     __ string_compare($str1$$Register, $str2$$Register,
15838                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15839                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15840   %}
15841   ins_pipe( pipe_slow );
15842 %}
15843 
15844 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15845                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15846 %{
15847   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15848   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15849   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15850 
15851   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15852   ins_encode %{
15853     __ string_compare($str1$$Register, $str2$$Register,
15854                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15855                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15856   %}
15857   ins_pipe( pipe_slow );
15858 %}
15859 
15860 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15861                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15862 %{
15863   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15864   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15865   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15866 
15867   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15868   ins_encode %{
15869     __ string_compare($str2$$Register, $str1$$Register,
15870                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15871                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15872   %}
15873   ins_pipe( pipe_slow );
15874 %}
15875 
15876 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15877                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15878 %{
15879   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15880   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15881   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15882 
15883   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15884   ins_encode %{
15885     __ string_compare($str2$$Register, $str1$$Register,
15886                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15887                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15888   %}
15889   ins_pipe( pipe_slow );
15890 %}
15891 
15892 // fast search of substring with known size.
15893 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15894                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15895 %{
15896   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15897   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15898   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15899 
15900   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15901   ins_encode %{
15902     int icnt2 = (int)$int_cnt2$$constant;
15903     if (icnt2 >= 16) {
15904       // IndexOf for constant substrings with size >= 16 elements
15905       // which don't need to be loaded through stack.
15906       __ string_indexofC8($str1$$Register, $str2$$Register,
15907                           $cnt1$$Register, $cnt2$$Register,
15908                           icnt2, $result$$Register,
15909                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15910     } else {
15911       // Small strings are loaded through stack if they cross page boundary.
15912       __ string_indexof($str1$$Register, $str2$$Register,
15913                         $cnt1$$Register, $cnt2$$Register,
15914                         icnt2, $result$$Register,
15915                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15916     }
15917   %}
15918   ins_pipe( pipe_slow );
15919 %}
15920 
15921 // fast search of substring with known size.
15922 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15923                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15924 %{
15925   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15926   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15927   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15928 
15929   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15930   ins_encode %{
15931     int icnt2 = (int)$int_cnt2$$constant;
15932     if (icnt2 >= 8) {
15933       // IndexOf for constant substrings with size >= 8 elements
15934       // which don't need to be loaded through stack.
15935       __ string_indexofC8($str1$$Register, $str2$$Register,
15936                           $cnt1$$Register, $cnt2$$Register,
15937                           icnt2, $result$$Register,
15938                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15939     } else {
15940       // Small strings are loaded through stack if they cross page boundary.
15941       __ string_indexof($str1$$Register, $str2$$Register,
15942                         $cnt1$$Register, $cnt2$$Register,
15943                         icnt2, $result$$Register,
15944                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15945     }
15946   %}
15947   ins_pipe( pipe_slow );
15948 %}
15949 
15950 // fast search of substring with known size.
15951 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15952                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15953 %{
15954   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15955   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15956   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15957 
15958   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15959   ins_encode %{
15960     int icnt2 = (int)$int_cnt2$$constant;
15961     if (icnt2 >= 8) {
15962       // IndexOf for constant substrings with size >= 8 elements
15963       // which don't need to be loaded through stack.
15964       __ string_indexofC8($str1$$Register, $str2$$Register,
15965                           $cnt1$$Register, $cnt2$$Register,
15966                           icnt2, $result$$Register,
15967                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15968     } else {
15969       // Small strings are loaded through stack if they cross page boundary.
15970       __ string_indexof($str1$$Register, $str2$$Register,
15971                         $cnt1$$Register, $cnt2$$Register,
15972                         icnt2, $result$$Register,
15973                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15974     }
15975   %}
15976   ins_pipe( pipe_slow );
15977 %}
15978 
15979 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15980                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15981 %{
15982   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15983   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15984   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15985 
15986   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15987   ins_encode %{
15988     __ string_indexof($str1$$Register, $str2$$Register,
15989                       $cnt1$$Register, $cnt2$$Register,
15990                       (-1), $result$$Register,
15991                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15992   %}
15993   ins_pipe( pipe_slow );
15994 %}
15995 
15996 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15997                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15998 %{
15999   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
16000   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16001   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
16002 
16003   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
16004   ins_encode %{
16005     __ string_indexof($str1$$Register, $str2$$Register,
16006                       $cnt1$$Register, $cnt2$$Register,
16007                       (-1), $result$$Register,
16008                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
16009   %}
16010   ins_pipe( pipe_slow );
16011 %}
16012 
16013 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
16014                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
16015 %{
16016   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
16017   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16018   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
16019 
16020   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
16021   ins_encode %{
16022     __ string_indexof($str1$$Register, $str2$$Register,
16023                       $cnt1$$Register, $cnt2$$Register,
16024                       (-1), $result$$Register,
16025                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
16026   %}
16027   ins_pipe( pipe_slow );
16028 %}
16029 
16030 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
16031                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
16032 %{
16033   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
16034   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16035   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
16036   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
16037   ins_encode %{
16038     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
16039                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
16040   %}
16041   ins_pipe( pipe_slow );
16042 %}
16043 
16044 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
16045                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
16046 %{
16047   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
16048   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16049   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
16050   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
16051   ins_encode %{
16052     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
16053                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
16054   %}
16055   ins_pipe( pipe_slow );
16056 %}
16057 
16058 // fast string equals
16059 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16060                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
16061 %{
16062   predicate(!VM_Version::supports_avx512vlbw());
16063   match(Set result (StrEquals (Binary str1 str2) cnt));
16064   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16065 
16066   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
16067   ins_encode %{
16068     __ arrays_equals(false, $str1$$Register, $str2$$Register,
16069                      $cnt$$Register, $result$$Register, $tmp3$$Register,
16070                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16071   %}
16072   ins_pipe( pipe_slow );
16073 %}
16074 
16075 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16076                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
16077 %{
16078   predicate(VM_Version::supports_avx512vlbw());
16079   match(Set result (StrEquals (Binary str1 str2) cnt));
16080   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16081 
16082   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
16083   ins_encode %{
16084     __ arrays_equals(false, $str1$$Register, $str2$$Register,
16085                      $cnt$$Register, $result$$Register, $tmp3$$Register,
16086                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16087   %}
16088   ins_pipe( pipe_slow );
16089 %}
16090 
16091 // fast array equals
16092 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16093                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16094 %{
16095   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16096   match(Set result (AryEq ary1 ary2));
16097   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16098 
16099   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16100   ins_encode %{
16101     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16102                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16103                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16104   %}
16105   ins_pipe( pipe_slow );
16106 %}
16107 
16108 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16109                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16110 %{
16111   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16112   match(Set result (AryEq ary1 ary2));
16113   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16114 
16115   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16116   ins_encode %{
16117     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16118                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16119                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16120   %}
16121   ins_pipe( pipe_slow );
16122 %}
16123 
16124 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16125                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16126 %{
16127   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16128   match(Set result (AryEq ary1 ary2));
16129   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16130 
16131   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16132   ins_encode %{
16133     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16134                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16135                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
16136   %}
16137   ins_pipe( pipe_slow );
16138 %}
16139 
16140 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16141                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16142 %{
16143   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16144   match(Set result (AryEq ary1 ary2));
16145   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16146 
16147   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16148   ins_encode %{
16149     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16150                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16151                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
16152   %}
16153   ins_pipe( pipe_slow );
16154 %}
16155 
16156 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
16157                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
16158                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
16159                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
16160                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
16161 %{
16162   predicate(UseAVX >= 2);
16163   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
16164   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
16165          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
16166          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
16167          USE basic_type, KILL cr);
16168 
16169   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
16170   ins_encode %{
16171     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
16172                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16173                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
16174                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
16175                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
16176                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
16177                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
16178   %}
16179   ins_pipe( pipe_slow );
16180 %}
16181 
16182 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16183                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
16184 %{
16185   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16186   match(Set result (CountPositives ary1 len));
16187   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16188 
16189   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
16190   ins_encode %{
16191     __ count_positives($ary1$$Register, $len$$Register,
16192                        $result$$Register, $tmp3$$Register,
16193                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
16194   %}
16195   ins_pipe( pipe_slow );
16196 %}
16197 
16198 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16199                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
16200 %{
16201   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16202   match(Set result (CountPositives ary1 len));
16203   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16204 
16205   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
16206   ins_encode %{
16207     __ count_positives($ary1$$Register, $len$$Register,
16208                        $result$$Register, $tmp3$$Register,
16209                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
16210   %}
16211   ins_pipe( pipe_slow );
16212 %}
16213 
16214 // fast char[] to byte[] compression
16215 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16216                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16217   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16218   match(Set result (StrCompressedCopy src (Binary dst len)));
16219   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
16220          USE_KILL len, KILL tmp5, KILL cr);
16221 
16222   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
16223   ins_encode %{
16224     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16225                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16226                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16227                            knoreg, knoreg);
16228   %}
16229   ins_pipe( pipe_slow );
16230 %}
16231 
16232 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16233                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16234   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16235   match(Set result (StrCompressedCopy src (Binary dst len)));
16236   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
16237          USE_KILL len, KILL tmp5, KILL cr);
16238 
16239   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
16240   ins_encode %{
16241     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16242                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16243                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16244                            $ktmp1$$KRegister, $ktmp2$$KRegister);
16245   %}
16246   ins_pipe( pipe_slow );
16247 %}
16248 // fast byte[] to char[] inflation
16249 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16250                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
16251   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16252   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16253   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16254 
16255   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16256   ins_encode %{
16257     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16258                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
16259   %}
16260   ins_pipe( pipe_slow );
16261 %}
16262 
16263 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16264                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
16265   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16266   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16267   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16268 
16269   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16270   ins_encode %{
16271     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16272                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
16273   %}
16274   ins_pipe( pipe_slow );
16275 %}
16276 
16277 // encode char[] to byte[] in ISO_8859_1
16278 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16279                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16280                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16281   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
16282   match(Set result (EncodeISOArray src (Binary dst len)));
16283   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16284 
16285   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16286   ins_encode %{
16287     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16288                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16289                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
16290   %}
16291   ins_pipe( pipe_slow );
16292 %}
16293 
16294 // encode char[] to byte[] in ASCII
16295 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16296                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16297                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16298   predicate(((EncodeISOArrayNode*)n)->is_ascii());
16299   match(Set result (EncodeISOArray src (Binary dst len)));
16300   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16301 
16302   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16303   ins_encode %{
16304     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16305                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16306                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16307   %}
16308   ins_pipe( pipe_slow );
16309 %}
16310 
16311 //----------Overflow Math Instructions-----------------------------------------
16312 
16313 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16314 %{
16315   match(Set cr (OverflowAddI op1 op2));
16316   effect(DEF cr, USE_KILL op1, USE op2);
16317 
16318   format %{ "addl    $op1, $op2\t# overflow check int" %}
16319 
16320   ins_encode %{
16321     __ addl($op1$$Register, $op2$$Register);
16322   %}
16323   ins_pipe(ialu_reg_reg);
16324 %}
16325 
16326 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16327 %{
16328   match(Set cr (OverflowAddI op1 op2));
16329   effect(DEF cr, USE_KILL op1, USE op2);
16330 
16331   format %{ "addl    $op1, $op2\t# overflow check int" %}
16332 
16333   ins_encode %{
16334     __ addl($op1$$Register, $op2$$constant);
16335   %}
16336   ins_pipe(ialu_reg_reg);
16337 %}
16338 
16339 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16340 %{
16341   match(Set cr (OverflowAddL op1 op2));
16342   effect(DEF cr, USE_KILL op1, USE op2);
16343 
16344   format %{ "addq    $op1, $op2\t# overflow check long" %}
16345   ins_encode %{
16346     __ addq($op1$$Register, $op2$$Register);
16347   %}
16348   ins_pipe(ialu_reg_reg);
16349 %}
16350 
16351 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16352 %{
16353   match(Set cr (OverflowAddL op1 op2));
16354   effect(DEF cr, USE_KILL op1, USE op2);
16355 
16356   format %{ "addq    $op1, $op2\t# overflow check long" %}
16357   ins_encode %{
16358     __ addq($op1$$Register, $op2$$constant);
16359   %}
16360   ins_pipe(ialu_reg_reg);
16361 %}
16362 
16363 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16364 %{
16365   match(Set cr (OverflowSubI op1 op2));
16366 
16367   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16368   ins_encode %{
16369     __ cmpl($op1$$Register, $op2$$Register);
16370   %}
16371   ins_pipe(ialu_reg_reg);
16372 %}
16373 
16374 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16375 %{
16376   match(Set cr (OverflowSubI op1 op2));
16377 
16378   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16379   ins_encode %{
16380     __ cmpl($op1$$Register, $op2$$constant);
16381   %}
16382   ins_pipe(ialu_reg_reg);
16383 %}
16384 
16385 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16386 %{
16387   match(Set cr (OverflowSubL op1 op2));
16388 
16389   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16390   ins_encode %{
16391     __ cmpq($op1$$Register, $op2$$Register);
16392   %}
16393   ins_pipe(ialu_reg_reg);
16394 %}
16395 
16396 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16397 %{
16398   match(Set cr (OverflowSubL op1 op2));
16399 
16400   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16401   ins_encode %{
16402     __ cmpq($op1$$Register, $op2$$constant);
16403   %}
16404   ins_pipe(ialu_reg_reg);
16405 %}
16406 
16407 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16408 %{
16409   match(Set cr (OverflowSubI zero op2));
16410   effect(DEF cr, USE_KILL op2);
16411 
16412   format %{ "negl    $op2\t# overflow check int" %}
16413   ins_encode %{
16414     __ negl($op2$$Register);
16415   %}
16416   ins_pipe(ialu_reg_reg);
16417 %}
16418 
16419 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16420 %{
16421   match(Set cr (OverflowSubL zero op2));
16422   effect(DEF cr, USE_KILL op2);
16423 
16424   format %{ "negq    $op2\t# overflow check long" %}
16425   ins_encode %{
16426     __ negq($op2$$Register);
16427   %}
16428   ins_pipe(ialu_reg_reg);
16429 %}
16430 
16431 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16432 %{
16433   match(Set cr (OverflowMulI op1 op2));
16434   effect(DEF cr, USE_KILL op1, USE op2);
16435 
16436   format %{ "imull    $op1, $op2\t# overflow check int" %}
16437   ins_encode %{
16438     __ imull($op1$$Register, $op2$$Register);
16439   %}
16440   ins_pipe(ialu_reg_reg_alu0);
16441 %}
16442 
16443 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16444 %{
16445   match(Set cr (OverflowMulI op1 op2));
16446   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16447 
16448   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
16449   ins_encode %{
16450     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16451   %}
16452   ins_pipe(ialu_reg_reg_alu0);
16453 %}
16454 
16455 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16456 %{
16457   match(Set cr (OverflowMulL op1 op2));
16458   effect(DEF cr, USE_KILL op1, USE op2);
16459 
16460   format %{ "imulq    $op1, $op2\t# overflow check long" %}
16461   ins_encode %{
16462     __ imulq($op1$$Register, $op2$$Register);
16463   %}
16464   ins_pipe(ialu_reg_reg_alu0);
16465 %}
16466 
16467 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16468 %{
16469   match(Set cr (OverflowMulL op1 op2));
16470   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16471 
16472   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
16473   ins_encode %{
16474     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16475   %}
16476   ins_pipe(ialu_reg_reg_alu0);
16477 %}
16478 
16479 
16480 //----------Control Flow Instructions------------------------------------------
16481 // Signed compare Instructions
16482 
16483 // XXX more variants!!
16484 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16485 %{
16486   match(Set cr (CmpI op1 op2));
16487   effect(DEF cr, USE op1, USE op2);
16488 
16489   format %{ "cmpl    $op1, $op2" %}
16490   ins_encode %{
16491     __ cmpl($op1$$Register, $op2$$Register);
16492   %}
16493   ins_pipe(ialu_cr_reg_reg);
16494 %}
16495 
16496 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16497 %{
16498   match(Set cr (CmpI op1 op2));
16499 
16500   format %{ "cmpl    $op1, $op2" %}
16501   ins_encode %{
16502     __ cmpl($op1$$Register, $op2$$constant);
16503   %}
16504   ins_pipe(ialu_cr_reg_imm);
16505 %}
16506 
16507 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16508 %{
16509   match(Set cr (CmpI op1 (LoadI op2)));
16510 
16511   ins_cost(500); // XXX
16512   format %{ "cmpl    $op1, $op2" %}
16513   ins_encode %{
16514     __ cmpl($op1$$Register, $op2$$Address);
16515   %}
16516   ins_pipe(ialu_cr_reg_mem);
16517 %}
16518 
16519 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16520 %{
16521   match(Set cr (CmpI src zero));
16522 
16523   format %{ "testl   $src, $src" %}
16524   ins_encode %{
16525     __ testl($src$$Register, $src$$Register);
16526   %}
16527   ins_pipe(ialu_cr_reg_imm);
16528 %}
16529 
16530 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16531 %{
16532   match(Set cr (CmpI (AndI src con) zero));
16533 
16534   format %{ "testl   $src, $con" %}
16535   ins_encode %{
16536     __ testl($src$$Register, $con$$constant);
16537   %}
16538   ins_pipe(ialu_cr_reg_imm);
16539 %}
16540 
16541 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16542 %{
16543   match(Set cr (CmpI (AndI src1 src2) zero));
16544 
16545   format %{ "testl   $src1, $src2" %}
16546   ins_encode %{
16547     __ testl($src1$$Register, $src2$$Register);
16548   %}
16549   ins_pipe(ialu_cr_reg_imm);
16550 %}
16551 
16552 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16553 %{
16554   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16555 
16556   format %{ "testl   $src, $mem" %}
16557   ins_encode %{
16558     __ testl($src$$Register, $mem$$Address);
16559   %}
16560   ins_pipe(ialu_cr_reg_mem);
16561 %}
16562 
16563 // Unsigned compare Instructions; really, same as signed except they
16564 // produce an rFlagsRegU instead of rFlagsReg.
16565 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16566 %{
16567   match(Set cr (CmpU op1 op2));
16568 
16569   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16570   ins_encode %{
16571     __ cmpl($op1$$Register, $op2$$Register);
16572   %}
16573   ins_pipe(ialu_cr_reg_reg);
16574 %}
16575 
16576 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16577 %{
16578   match(Set cr (CmpU op1 op2));
16579 
16580   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16581   ins_encode %{
16582     __ cmpl($op1$$Register, $op2$$constant);
16583   %}
16584   ins_pipe(ialu_cr_reg_imm);
16585 %}
16586 
16587 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16588 %{
16589   match(Set cr (CmpU op1 (LoadI op2)));
16590 
16591   ins_cost(500); // XXX
16592   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16593   ins_encode %{
16594     __ cmpl($op1$$Register, $op2$$Address);
16595   %}
16596   ins_pipe(ialu_cr_reg_mem);
16597 %}
16598 
16599 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16600 %{
16601   match(Set cr (CmpU src zero));
16602 
16603   format %{ "testl   $src, $src\t# unsigned" %}
16604   ins_encode %{
16605     __ testl($src$$Register, $src$$Register);
16606   %}
16607   ins_pipe(ialu_cr_reg_imm);
16608 %}
16609 
16610 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16611 %{
16612   match(Set cr (CmpP op1 op2));
16613 
16614   format %{ "cmpq    $op1, $op2\t# ptr" %}
16615   ins_encode %{
16616     __ cmpq($op1$$Register, $op2$$Register);
16617   %}
16618   ins_pipe(ialu_cr_reg_reg);
16619 %}
16620 
16621 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16622 %{
16623   match(Set cr (CmpP op1 (LoadP op2)));
16624   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16625 
16626   ins_cost(500); // XXX
16627   format %{ "cmpq    $op1, $op2\t# ptr" %}
16628   ins_encode %{
16629     __ cmpq($op1$$Register, $op2$$Address);
16630   %}
16631   ins_pipe(ialu_cr_reg_mem);
16632 %}
16633 
16634 // XXX this is generalized by compP_rReg_mem???
16635 // Compare raw pointer (used in out-of-heap check).
16636 // Only works because non-oop pointers must be raw pointers
16637 // and raw pointers have no anti-dependencies.
16638 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16639 %{
16640   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16641             n->in(2)->as_Load()->barrier_data() == 0);
16642   match(Set cr (CmpP op1 (LoadP op2)));
16643 
16644   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16645   ins_encode %{
16646     __ cmpq($op1$$Register, $op2$$Address);
16647   %}
16648   ins_pipe(ialu_cr_reg_mem);
16649 %}
16650 
16651 // This will generate a signed flags result. This should be OK since
16652 // any compare to a zero should be eq/neq.
16653 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16654 %{
16655   match(Set cr (CmpP src zero));
16656 
16657   format %{ "testq   $src, $src\t# ptr" %}
16658   ins_encode %{
16659     __ testq($src$$Register, $src$$Register);
16660   %}
16661   ins_pipe(ialu_cr_reg_imm);
16662 %}
16663 
16664 // This will generate a signed flags result. This should be OK since
16665 // any compare to a zero should be eq/neq.
16666 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16667 %{
16668   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16669             n->in(1)->as_Load()->barrier_data() == 0);
16670   match(Set cr (CmpP (LoadP op) zero));
16671 
16672   ins_cost(500); // XXX
16673   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16674   ins_encode %{
16675     __ testq($op$$Address, 0xFFFFFFFF);
16676   %}
16677   ins_pipe(ialu_cr_reg_imm);
16678 %}
16679 
16680 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16681 %{
16682   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16683             n->in(1)->as_Load()->barrier_data() == 0);
16684   match(Set cr (CmpP (LoadP mem) zero));
16685 
16686   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16687   ins_encode %{
16688     __ cmpq(r12, $mem$$Address);
16689   %}
16690   ins_pipe(ialu_cr_reg_mem);
16691 %}
16692 
16693 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16694 %{
16695   match(Set cr (CmpN op1 op2));
16696 
16697   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16698   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16699   ins_pipe(ialu_cr_reg_reg);
16700 %}
16701 
16702 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16703 %{
16704   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16705   match(Set cr (CmpN src (LoadN mem)));
16706 
16707   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16708   ins_encode %{
16709     __ cmpl($src$$Register, $mem$$Address);
16710   %}
16711   ins_pipe(ialu_cr_reg_mem);
16712 %}
16713 
16714 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16715   match(Set cr (CmpN op1 op2));
16716 
16717   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16718   ins_encode %{
16719     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16720   %}
16721   ins_pipe(ialu_cr_reg_imm);
16722 %}
16723 
16724 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16725 %{
16726   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16727   match(Set cr (CmpN src (LoadN mem)));
16728 
16729   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16730   ins_encode %{
16731     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16732   %}
16733   ins_pipe(ialu_cr_reg_mem);
16734 %}
16735 
16736 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16737   match(Set cr (CmpN op1 op2));
16738 
16739   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16740   ins_encode %{
16741     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16742   %}
16743   ins_pipe(ialu_cr_reg_imm);
16744 %}
16745 
16746 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16747 %{
16748   predicate(!UseCompactObjectHeaders);
16749   match(Set cr (CmpN src (LoadNKlass mem)));
16750 
16751   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16752   ins_encode %{
16753     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16754   %}
16755   ins_pipe(ialu_cr_reg_mem);
16756 %}
16757 
16758 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16759   match(Set cr (CmpN src zero));
16760 
16761   format %{ "testl   $src, $src\t# compressed ptr" %}
16762   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16763   ins_pipe(ialu_cr_reg_imm);
16764 %}
16765 
16766 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16767 %{
16768   predicate(CompressedOops::base() != nullptr &&
16769             n->in(1)->as_Load()->barrier_data() == 0);
16770   match(Set cr (CmpN (LoadN mem) zero));
16771 
16772   ins_cost(500); // XXX
16773   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16774   ins_encode %{
16775     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16776   %}
16777   ins_pipe(ialu_cr_reg_mem);
16778 %}
16779 
16780 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16781 %{
16782   predicate(CompressedOops::base() == nullptr &&
16783             n->in(1)->as_Load()->barrier_data() == 0);
16784   match(Set cr (CmpN (LoadN mem) zero));
16785 
16786   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16787   ins_encode %{
16788     __ cmpl(r12, $mem$$Address);
16789   %}
16790   ins_pipe(ialu_cr_reg_mem);
16791 %}
16792 
16793 // Yanked all unsigned pointer compare operations.
16794 // Pointer compares are done with CmpP which is already unsigned.
16795 
16796 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16797 %{
16798   match(Set cr (CmpL op1 op2));
16799 
16800   format %{ "cmpq    $op1, $op2" %}
16801   ins_encode %{
16802     __ cmpq($op1$$Register, $op2$$Register);
16803   %}
16804   ins_pipe(ialu_cr_reg_reg);
16805 %}
16806 
16807 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16808 %{
16809   match(Set cr (CmpL op1 op2));
16810 
16811   format %{ "cmpq    $op1, $op2" %}
16812   ins_encode %{
16813     __ cmpq($op1$$Register, $op2$$constant);
16814   %}
16815   ins_pipe(ialu_cr_reg_imm);
16816 %}
16817 
16818 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16819 %{
16820   match(Set cr (CmpL op1 (LoadL op2)));
16821 
16822   format %{ "cmpq    $op1, $op2" %}
16823   ins_encode %{
16824     __ cmpq($op1$$Register, $op2$$Address);
16825   %}
16826   ins_pipe(ialu_cr_reg_mem);
16827 %}
16828 
16829 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16830 %{
16831   match(Set cr (CmpL src zero));
16832 
16833   format %{ "testq   $src, $src" %}
16834   ins_encode %{
16835     __ testq($src$$Register, $src$$Register);
16836   %}
16837   ins_pipe(ialu_cr_reg_imm);
16838 %}
16839 
16840 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16841 %{
16842   match(Set cr (CmpL (AndL src con) zero));
16843 
16844   format %{ "testq   $src, $con\t# long" %}
16845   ins_encode %{
16846     __ testq($src$$Register, $con$$constant);
16847   %}
16848   ins_pipe(ialu_cr_reg_imm);
16849 %}
16850 
16851 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16852 %{
16853   match(Set cr (CmpL (AndL src1 src2) zero));
16854 
16855   format %{ "testq   $src1, $src2\t# long" %}
16856   ins_encode %{
16857     __ testq($src1$$Register, $src2$$Register);
16858   %}
16859   ins_pipe(ialu_cr_reg_imm);
16860 %}
16861 
16862 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16863 %{
16864   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16865 
16866   format %{ "testq   $src, $mem" %}
16867   ins_encode %{
16868     __ testq($src$$Register, $mem$$Address);
16869   %}
16870   ins_pipe(ialu_cr_reg_mem);
16871 %}
16872 
16873 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16874 %{
16875   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16876 
16877   format %{ "testq   $src, $mem" %}
16878   ins_encode %{
16879     __ testq($src$$Register, $mem$$Address);
16880   %}
16881   ins_pipe(ialu_cr_reg_mem);
16882 %}
16883 
16884 // Manifest a CmpU result in an integer register.  Very painful.
16885 // This is the test to avoid.
16886 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16887 %{
16888   match(Set dst (CmpU3 src1 src2));
16889   effect(KILL flags);
16890 
16891   ins_cost(275); // XXX
16892   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16893             "movl    $dst, -1\n\t"
16894             "jb,u    done\n\t"
16895             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16896     "done:" %}
16897   ins_encode %{
16898     Label done;
16899     __ cmpl($src1$$Register, $src2$$Register);
16900     __ movl($dst$$Register, -1);
16901     __ jccb(Assembler::below, done);
16902     __ setcc(Assembler::notZero, $dst$$Register);
16903     __ bind(done);
16904   %}
16905   ins_pipe(pipe_slow);
16906 %}
16907 
16908 // Manifest a CmpL result in an integer register.  Very painful.
16909 // This is the test to avoid.
16910 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16911 %{
16912   match(Set dst (CmpL3 src1 src2));
16913   effect(KILL flags);
16914 
16915   ins_cost(275); // XXX
16916   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16917             "movl    $dst, -1\n\t"
16918             "jl,s    done\n\t"
16919             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16920     "done:" %}
16921   ins_encode %{
16922     Label done;
16923     __ cmpq($src1$$Register, $src2$$Register);
16924     __ movl($dst$$Register, -1);
16925     __ jccb(Assembler::less, done);
16926     __ setcc(Assembler::notZero, $dst$$Register);
16927     __ bind(done);
16928   %}
16929   ins_pipe(pipe_slow);
16930 %}
16931 
16932 // Manifest a CmpUL result in an integer register.  Very painful.
16933 // This is the test to avoid.
16934 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16935 %{
16936   match(Set dst (CmpUL3 src1 src2));
16937   effect(KILL flags);
16938 
16939   ins_cost(275); // XXX
16940   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16941             "movl    $dst, -1\n\t"
16942             "jb,u    done\n\t"
16943             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16944     "done:" %}
16945   ins_encode %{
16946     Label done;
16947     __ cmpq($src1$$Register, $src2$$Register);
16948     __ movl($dst$$Register, -1);
16949     __ jccb(Assembler::below, done);
16950     __ setcc(Assembler::notZero, $dst$$Register);
16951     __ bind(done);
16952   %}
16953   ins_pipe(pipe_slow);
16954 %}
16955 
16956 // Unsigned long compare Instructions; really, same as signed long except they
16957 // produce an rFlagsRegU instead of rFlagsReg.
16958 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16959 %{
16960   match(Set cr (CmpUL op1 op2));
16961 
16962   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16963   ins_encode %{
16964     __ cmpq($op1$$Register, $op2$$Register);
16965   %}
16966   ins_pipe(ialu_cr_reg_reg);
16967 %}
16968 
16969 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16970 %{
16971   match(Set cr (CmpUL op1 op2));
16972 
16973   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16974   ins_encode %{
16975     __ cmpq($op1$$Register, $op2$$constant);
16976   %}
16977   ins_pipe(ialu_cr_reg_imm);
16978 %}
16979 
16980 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16981 %{
16982   match(Set cr (CmpUL op1 (LoadL op2)));
16983 
16984   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16985   ins_encode %{
16986     __ cmpq($op1$$Register, $op2$$Address);
16987   %}
16988   ins_pipe(ialu_cr_reg_mem);
16989 %}
16990 
16991 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16992 %{
16993   match(Set cr (CmpUL src zero));
16994 
16995   format %{ "testq   $src, $src\t# unsigned" %}
16996   ins_encode %{
16997     __ testq($src$$Register, $src$$Register);
16998   %}
16999   ins_pipe(ialu_cr_reg_imm);
17000 %}
17001 
17002 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
17003 %{
17004   match(Set cr (CmpI (LoadB mem) imm));
17005 
17006   ins_cost(125);
17007   format %{ "cmpb    $mem, $imm" %}
17008   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
17009   ins_pipe(ialu_cr_reg_mem);
17010 %}
17011 
17012 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
17013 %{
17014   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
17015 
17016   ins_cost(125);
17017   format %{ "testb   $mem, $imm\t# ubyte" %}
17018   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
17019   ins_pipe(ialu_cr_reg_mem);
17020 %}
17021 
17022 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
17023 %{
17024   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
17025 
17026   ins_cost(125);
17027   format %{ "testb   $mem, $imm\t# byte" %}
17028   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
17029   ins_pipe(ialu_cr_reg_mem);
17030 %}
17031 
17032 //----------Max and Min--------------------------------------------------------
17033 // Min Instructions
17034 
17035 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
17036 %{
17037   predicate(!UseAPX);
17038   effect(USE_DEF dst, USE src, USE cr);
17039 
17040   format %{ "cmovlgt $dst, $src\t# min" %}
17041   ins_encode %{
17042     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
17043   %}
17044   ins_pipe(pipe_cmov_reg);
17045 %}
17046 
17047 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
17048 %{
17049   predicate(UseAPX);
17050   effect(DEF dst, USE src1, USE src2, USE cr);
17051 
17052   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
17053   ins_encode %{
17054     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
17055   %}
17056   ins_pipe(pipe_cmov_reg);
17057 %}
17058 
17059 instruct minI_rReg(rRegI dst, rRegI src)
17060 %{
17061   predicate(!UseAPX);
17062   match(Set dst (MinI dst src));
17063 
17064   ins_cost(200);
17065   expand %{
17066     rFlagsReg cr;
17067     compI_rReg(cr, dst, src);
17068     cmovI_reg_g(dst, src, cr);
17069   %}
17070 %}
17071 
17072 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17073 %{
17074   predicate(UseAPX);
17075   match(Set dst (MinI src1 src2));
17076   effect(DEF dst, USE src1, USE src2);
17077   flag(PD::Flag_ndd_demotable_opr1);
17078 
17079   ins_cost(200);
17080   expand %{
17081     rFlagsReg cr;
17082     compI_rReg(cr, src1, src2);
17083     cmovI_reg_g_ndd(dst, src1, src2, cr);
17084   %}
17085 %}
17086 
17087 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
17088 %{
17089   predicate(!UseAPX);
17090   effect(USE_DEF dst, USE src, USE cr);
17091 
17092   format %{ "cmovllt $dst, $src\t# max" %}
17093   ins_encode %{
17094     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
17095   %}
17096   ins_pipe(pipe_cmov_reg);
17097 %}
17098 
17099 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
17100 %{
17101   predicate(UseAPX);
17102   effect(DEF dst, USE src1, USE src2, USE cr);
17103 
17104   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
17105   ins_encode %{
17106     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
17107   %}
17108   ins_pipe(pipe_cmov_reg);
17109 %}
17110 
17111 instruct maxI_rReg(rRegI dst, rRegI src)
17112 %{
17113   predicate(!UseAPX);
17114   match(Set dst (MaxI dst src));
17115 
17116   ins_cost(200);
17117   expand %{
17118     rFlagsReg cr;
17119     compI_rReg(cr, dst, src);
17120     cmovI_reg_l(dst, src, cr);
17121   %}
17122 %}
17123 
17124 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17125 %{
17126   predicate(UseAPX);
17127   match(Set dst (MaxI src1 src2));
17128   effect(DEF dst, USE src1, USE src2);
17129   flag(PD::Flag_ndd_demotable_opr1);
17130 
17131   ins_cost(200);
17132   expand %{
17133     rFlagsReg cr;
17134     compI_rReg(cr, src1, src2);
17135     cmovI_reg_l_ndd(dst, src1, src2, cr);
17136   %}
17137 %}
17138 
17139 // ============================================================================
17140 // Branch Instructions
17141 
17142 // Jump Direct - Label defines a relative address from JMP+1
17143 instruct jmpDir(label labl)
17144 %{
17145   match(Goto);
17146   effect(USE labl);
17147 
17148   ins_cost(300);
17149   format %{ "jmp     $labl" %}
17150   size(5);
17151   ins_encode %{
17152     Label* L = $labl$$label;
17153     __ jmp(*L, false); // Always long jump
17154   %}
17155   ins_pipe(pipe_jmp);
17156 %}
17157 
17158 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17159 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
17160 %{
17161   match(If cop cr);
17162   effect(USE labl);
17163 
17164   ins_cost(300);
17165   format %{ "j$cop     $labl" %}
17166   size(6);
17167   ins_encode %{
17168     Label* L = $labl$$label;
17169     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17170   %}
17171   ins_pipe(pipe_jcc);
17172 %}
17173 
17174 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17175 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
17176 %{
17177   match(CountedLoopEnd cop cr);
17178   effect(USE labl);
17179 
17180   ins_cost(300);
17181   format %{ "j$cop     $labl\t# loop end" %}
17182   size(6);
17183   ins_encode %{
17184     Label* L = $labl$$label;
17185     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17186   %}
17187   ins_pipe(pipe_jcc);
17188 %}
17189 
17190 // Jump Direct Conditional - using unsigned comparison
17191 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17192   match(If cop cmp);
17193   effect(USE labl);
17194 
17195   ins_cost(300);
17196   format %{ "j$cop,u   $labl" %}
17197   size(6);
17198   ins_encode %{
17199     Label* L = $labl$$label;
17200     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17201   %}
17202   ins_pipe(pipe_jcc);
17203 %}
17204 
17205 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17206   match(If cop cmp);
17207   effect(USE labl);
17208 
17209   ins_cost(200);
17210   format %{ "j$cop,u   $labl" %}
17211   size(6);
17212   ins_encode %{
17213     Label* L = $labl$$label;
17214     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17215   %}
17216   ins_pipe(pipe_jcc);
17217 %}
17218 
17219 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17220   match(If cop cmp);
17221   effect(USE labl);
17222 
17223   ins_cost(200);
17224   format %{ $$template
17225     if ($cop$$cmpcode == Assembler::notEqual) {
17226       $$emit$$"jp,u    $labl\n\t"
17227       $$emit$$"j$cop,u   $labl"
17228     } else {
17229       $$emit$$"jp,u    done\n\t"
17230       $$emit$$"j$cop,u   $labl\n\t"
17231       $$emit$$"done:"
17232     }
17233   %}
17234   ins_encode %{
17235     Label* l = $labl$$label;
17236     if ($cop$$cmpcode == Assembler::notEqual) {
17237       __ jcc(Assembler::parity, *l, false);
17238       __ jcc(Assembler::notEqual, *l, false);
17239     } else if ($cop$$cmpcode == Assembler::equal) {
17240       Label done;
17241       __ jccb(Assembler::parity, done);
17242       __ jcc(Assembler::equal, *l, false);
17243       __ bind(done);
17244     } else {
17245        ShouldNotReachHere();
17246     }
17247   %}
17248   ins_pipe(pipe_jcc);
17249 %}
17250 
17251 // Jump Direct Conditional - using signed and unsigned comparison
17252 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17253   match(If cop cmp);
17254   effect(USE labl);
17255 
17256   ins_cost(200);
17257   format %{ "j$cop,su   $labl" %}
17258   size(6);
17259   ins_encode %{
17260     Label* L = $labl$$label;
17261     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17262   %}
17263   ins_pipe(pipe_jcc);
17264 %}
17265 
17266 // ============================================================================
17267 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
17268 // superklass array for an instance of the superklass.  Set a hidden
17269 // internal cache on a hit (cache is checked with exposed code in
17270 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
17271 // encoding ALSO sets flags.
17272 
17273 instruct partialSubtypeCheck(rdi_RegP result,
17274                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
17275                              rFlagsReg cr)
17276 %{
17277   match(Set result (PartialSubtypeCheck sub super));
17278   predicate(!UseSecondarySupersTable);
17279   effect(KILL rcx, KILL cr);
17280 
17281   ins_cost(1100);  // slightly larger than the next version
17282   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
17283             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
17284             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
17285             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
17286             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
17287             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
17288             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
17289     "miss:\t" %}
17290 
17291   ins_encode %{
17292     Label miss;
17293     // NB: Callers may assume that, when $result is a valid register,
17294     // check_klass_subtype_slow_path_linear sets it to a nonzero
17295     // value.
17296     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
17297                                             $rcx$$Register, $result$$Register,
17298                                             nullptr, &miss,
17299                                             /*set_cond_codes:*/ true);
17300     __ xorptr($result$$Register, $result$$Register);
17301     __ bind(miss);
17302   %}
17303 
17304   ins_pipe(pipe_slow);
17305 %}
17306 
17307 // ============================================================================
17308 // Two versions of hashtable-based partialSubtypeCheck, both used when
17309 // we need to search for a super class in the secondary supers array.
17310 // The first is used when we don't know _a priori_ the class being
17311 // searched for. The second, far more common, is used when we do know:
17312 // this is used for instanceof, checkcast, and any case where C2 can
17313 // determine it by constant propagation.
17314 
17315 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17316                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17317                                        rFlagsReg cr)
17318 %{
17319   match(Set result (PartialSubtypeCheck sub super));
17320   predicate(UseSecondarySupersTable);
17321   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17322 
17323   ins_cost(1000);
17324   format %{ "partialSubtypeCheck $result, $sub, $super" %}
17325 
17326   ins_encode %{
17327     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17328 					 $temp3$$Register, $temp4$$Register, $result$$Register);
17329   %}
17330 
17331   ins_pipe(pipe_slow);
17332 %}
17333 
17334 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17335                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17336                                        rFlagsReg cr)
17337 %{
17338   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17339   predicate(UseSecondarySupersTable);
17340   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17341 
17342   ins_cost(700);  // smaller than the next version
17343   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17344 
17345   ins_encode %{
17346     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17347     if (InlineSecondarySupersTest) {
17348       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17349                                        $temp3$$Register, $temp4$$Register, $result$$Register,
17350                                        super_klass_slot);
17351     } else {
17352       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17353     }
17354   %}
17355 
17356   ins_pipe(pipe_slow);
17357 %}
17358 
17359 // ============================================================================
17360 // Branch Instructions -- short offset versions
17361 //
17362 // These instructions are used to replace jumps of a long offset (the default
17363 // match) with jumps of a shorter offset.  These instructions are all tagged
17364 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17365 // match rules in general matching.  Instead, the ADLC generates a conversion
17366 // method in the MachNode which can be used to do in-place replacement of the
17367 // long variant with the shorter variant.  The compiler will determine if a
17368 // branch can be taken by the is_short_branch_offset() predicate in the machine
17369 // specific code section of the file.
17370 
17371 // Jump Direct - Label defines a relative address from JMP+1
17372 instruct jmpDir_short(label labl) %{
17373   match(Goto);
17374   effect(USE labl);
17375 
17376   ins_cost(300);
17377   format %{ "jmp,s   $labl" %}
17378   size(2);
17379   ins_encode %{
17380     Label* L = $labl$$label;
17381     __ jmpb(*L);
17382   %}
17383   ins_pipe(pipe_jmp);
17384   ins_short_branch(1);
17385 %}
17386 
17387 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17388 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17389   match(If cop cr);
17390   effect(USE labl);
17391 
17392   ins_cost(300);
17393   format %{ "j$cop,s   $labl" %}
17394   size(2);
17395   ins_encode %{
17396     Label* L = $labl$$label;
17397     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17398   %}
17399   ins_pipe(pipe_jcc);
17400   ins_short_branch(1);
17401 %}
17402 
17403 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17404 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17405   match(CountedLoopEnd cop cr);
17406   effect(USE labl);
17407 
17408   ins_cost(300);
17409   format %{ "j$cop,s   $labl\t# loop end" %}
17410   size(2);
17411   ins_encode %{
17412     Label* L = $labl$$label;
17413     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17414   %}
17415   ins_pipe(pipe_jcc);
17416   ins_short_branch(1);
17417 %}
17418 
17419 // Jump Direct Conditional - using unsigned comparison
17420 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17421   match(If cop cmp);
17422   effect(USE labl);
17423 
17424   ins_cost(300);
17425   format %{ "j$cop,us  $labl" %}
17426   size(2);
17427   ins_encode %{
17428     Label* L = $labl$$label;
17429     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17430   %}
17431   ins_pipe(pipe_jcc);
17432   ins_short_branch(1);
17433 %}
17434 
17435 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17436   match(If cop cmp);
17437   effect(USE labl);
17438 
17439   ins_cost(300);
17440   format %{ "j$cop,us  $labl" %}
17441   size(2);
17442   ins_encode %{
17443     Label* L = $labl$$label;
17444     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17445   %}
17446   ins_pipe(pipe_jcc);
17447   ins_short_branch(1);
17448 %}
17449 
17450 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17451   match(If cop cmp);
17452   effect(USE labl);
17453 
17454   ins_cost(300);
17455   format %{ $$template
17456     if ($cop$$cmpcode == Assembler::notEqual) {
17457       $$emit$$"jp,u,s  $labl\n\t"
17458       $$emit$$"j$cop,u,s  $labl"
17459     } else {
17460       $$emit$$"jp,u,s  done\n\t"
17461       $$emit$$"j$cop,u,s  $labl\n\t"
17462       $$emit$$"done:"
17463     }
17464   %}
17465   size(4);
17466   ins_encode %{
17467     Label* l = $labl$$label;
17468     if ($cop$$cmpcode == Assembler::notEqual) {
17469       __ jccb(Assembler::parity, *l);
17470       __ jccb(Assembler::notEqual, *l);
17471     } else if ($cop$$cmpcode == Assembler::equal) {
17472       Label done;
17473       __ jccb(Assembler::parity, done);
17474       __ jccb(Assembler::equal, *l);
17475       __ bind(done);
17476     } else {
17477        ShouldNotReachHere();
17478     }
17479   %}
17480   ins_pipe(pipe_jcc);
17481   ins_short_branch(1);
17482 %}
17483 
17484 // Jump Direct Conditional - using signed and unsigned comparison
17485 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17486   match(If cop cmp);
17487   effect(USE labl);
17488 
17489   ins_cost(300);
17490   format %{ "j$cop,sus  $labl" %}
17491   size(2);
17492   ins_encode %{
17493     Label* L = $labl$$label;
17494     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17495   %}
17496   ins_pipe(pipe_jcc);
17497   ins_short_branch(1);
17498 %}
17499 
17500 // ============================================================================
17501 // inlined locking and unlocking
17502 
17503 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17504   match(Set cr (FastLock object box));
17505   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17506   ins_cost(300);
17507   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17508   ins_encode %{
17509     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17510   %}
17511   ins_pipe(pipe_slow);
17512 %}
17513 
17514 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17515   match(Set cr (FastUnlock object rax_reg));
17516   effect(TEMP tmp, USE_KILL rax_reg);
17517   ins_cost(300);
17518   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17519   ins_encode %{
17520     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17521   %}
17522   ins_pipe(pipe_slow);
17523 %}
17524 
17525 
17526 // ============================================================================
17527 // Safepoint Instructions
17528 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17529 %{
17530   match(SafePoint poll);
17531   effect(KILL cr, USE poll);
17532 
17533   format %{ "testl   rax, [$poll]\t"
17534             "# Safepoint: poll for GC" %}
17535   ins_cost(125);
17536   ins_encode %{
17537     __ relocate(relocInfo::poll_type);
17538     address pre_pc = __ pc();
17539     __ testl(rax, Address($poll$$Register, 0));
17540     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17541   %}
17542   ins_pipe(ialu_reg_mem);
17543 %}
17544 
17545 instruct mask_all_evexL(kReg dst, rRegL src) %{
17546   match(Set dst (MaskAll src));
17547   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17548   ins_encode %{
17549     int mask_len = Matcher::vector_length(this);
17550     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17551   %}
17552   ins_pipe( pipe_slow );
17553 %}
17554 
17555 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17556   predicate(Matcher::vector_length(n) > 32);
17557   match(Set dst (MaskAll src));
17558   effect(TEMP tmp);
17559   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17560   ins_encode %{
17561     int mask_len = Matcher::vector_length(this);
17562     __ movslq($tmp$$Register, $src$$Register);
17563     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17564   %}
17565   ins_pipe( pipe_slow );
17566 %}
17567 
17568 // ============================================================================
17569 // Procedure Call/Return Instructions
17570 // Call Java Static Instruction
17571 // Note: If this code changes, the corresponding ret_addr_offset() and
17572 //       compute_padding() functions will have to be adjusted.
17573 instruct CallStaticJavaDirect(method meth) %{
17574   match(CallStaticJava);
17575   effect(USE meth);
17576 
17577   ins_cost(300);
17578   format %{ "call,static " %}
17579   opcode(0xE8); /* E8 cd */
17580   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17581   ins_pipe(pipe_slow);
17582   ins_alignment(4);
17583 %}
17584 
17585 // Call Java Dynamic Instruction
17586 // Note: If this code changes, the corresponding ret_addr_offset() and
17587 //       compute_padding() functions will have to be adjusted.
17588 instruct CallDynamicJavaDirect(method meth)
17589 %{
17590   match(CallDynamicJava);
17591   effect(USE meth);
17592 
17593   ins_cost(300);
17594   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17595             "call,dynamic " %}
17596   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17597   ins_pipe(pipe_slow);
17598   ins_alignment(4);
17599 %}
17600 
17601 // Call Runtime Instruction
17602 instruct CallRuntimeDirect(method meth)
17603 %{
17604   match(CallRuntime);
17605   effect(USE meth);
17606 
17607   ins_cost(300);
17608   format %{ "call,runtime " %}
17609   ins_encode(clear_avx, Java_To_Runtime(meth));
17610   ins_pipe(pipe_slow);
17611 %}
17612 
17613 // Call runtime without safepoint
17614 instruct CallLeafDirect(method meth)
17615 %{
17616   match(CallLeaf);
17617   effect(USE meth);
17618 
17619   ins_cost(300);
17620   format %{ "call_leaf,runtime " %}
17621   ins_encode(clear_avx, Java_To_Runtime(meth));
17622   ins_pipe(pipe_slow);
17623 %}
17624 
17625 // Call runtime without safepoint and with vector arguments
17626 instruct CallLeafDirectVector(method meth)
17627 %{
17628   match(CallLeafVector);
17629   effect(USE meth);
17630 
17631   ins_cost(300);
17632   format %{ "call_leaf,vector " %}
17633   ins_encode(Java_To_Runtime(meth));
17634   ins_pipe(pipe_slow);
17635 %}
17636 
17637 // Call runtime without safepoint
17638 // entry point is null, target holds the address to call
17639 instruct CallLeafNoFPInDirect(rRegP target)
17640 %{
17641   predicate(n->as_Call()->entry_point() == nullptr);
17642   match(CallLeafNoFP target);
17643 
17644   ins_cost(300);
17645   format %{ "call_leaf_nofp,runtime indirect " %}
17646   ins_encode %{
17647      __ call($target$$Register);
17648   %}
17649 
17650   ins_pipe(pipe_slow);
17651 %}
17652 
17653 // Call runtime without safepoint
17654 instruct CallLeafNoFPDirect(method meth)
17655 %{
17656   predicate(n->as_Call()->entry_point() != nullptr);
17657   match(CallLeafNoFP);
17658   effect(USE meth);
17659 
17660   ins_cost(300);
17661   format %{ "call_leaf_nofp,runtime " %}
17662   ins_encode(clear_avx, Java_To_Runtime(meth));
17663   ins_pipe(pipe_slow);
17664 %}
17665 
17666 // Return Instruction
17667 // Remove the return address & jump to it.
17668 // Notice: We always emit a nop after a ret to make sure there is room
17669 // for safepoint patching
17670 instruct Ret()
17671 %{
17672   match(Return);
17673 
17674   format %{ "ret" %}
17675   ins_encode %{
17676     __ ret(0);
17677   %}
17678   ins_pipe(pipe_jmp);
17679 %}
17680 
17681 // Tail Call; Jump from runtime stub to Java code.
17682 // Also known as an 'interprocedural jump'.
17683 // Target of jump will eventually return to caller.
17684 // TailJump below removes the return address.
17685 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17686 // emitted just above the TailCall which has reset rbp to the caller state.
17687 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17688 %{
17689   match(TailCall jump_target method_ptr);
17690 
17691   ins_cost(300);
17692   format %{ "jmp     $jump_target\t# rbx holds method" %}
17693   ins_encode %{
17694     __ jmp($jump_target$$Register);
17695   %}
17696   ins_pipe(pipe_jmp);
17697 %}
17698 
17699 // Tail Jump; remove the return address; jump to target.
17700 // TailCall above leaves the return address around.
17701 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17702 %{
17703   match(TailJump jump_target ex_oop);
17704 
17705   ins_cost(300);
17706   format %{ "popq    rdx\t# pop return address\n\t"
17707             "jmp     $jump_target" %}
17708   ins_encode %{
17709     __ popq(as_Register(RDX_enc));
17710     __ jmp($jump_target$$Register);
17711   %}
17712   ins_pipe(pipe_jmp);
17713 %}
17714 
17715 // Forward exception.
17716 instruct ForwardExceptionjmp()
17717 %{
17718   match(ForwardException);
17719 
17720   format %{ "jmp     forward_exception_stub" %}
17721   ins_encode %{
17722     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17723   %}
17724   ins_pipe(pipe_jmp);
17725 %}
17726 
17727 // Create exception oop: created by stack-crawling runtime code.
17728 // Created exception is now available to this handler, and is setup
17729 // just prior to jumping to this handler.  No code emitted.
17730 instruct CreateException(rax_RegP ex_oop)
17731 %{
17732   match(Set ex_oop (CreateEx));
17733 
17734   size(0);
17735   // use the following format syntax
17736   format %{ "# exception oop is in rax; no code emitted" %}
17737   ins_encode();
17738   ins_pipe(empty);
17739 %}
17740 
17741 // Rethrow exception:
17742 // The exception oop will come in the first argument position.
17743 // Then JUMP (not call) to the rethrow stub code.
17744 instruct RethrowException()
17745 %{
17746   match(Rethrow);
17747 
17748   // use the following format syntax
17749   format %{ "jmp     rethrow_stub" %}
17750   ins_encode %{
17751     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17752   %}
17753   ins_pipe(pipe_jmp);
17754 %}
17755 
17756 // ============================================================================
17757 // This name is KNOWN by the ADLC and cannot be changed.
17758 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17759 // for this guy.
17760 instruct tlsLoadP(r15_RegP dst) %{
17761   match(Set dst (ThreadLocal));
17762   effect(DEF dst);
17763 
17764   size(0);
17765   format %{ "# TLS is in R15" %}
17766   ins_encode( /*empty encoding*/ );
17767   ins_pipe(ialu_reg_reg);
17768 %}
17769 
17770 instruct addF_reg(regF dst, regF src) %{
17771   predicate(UseAVX == 0);
17772   match(Set dst (AddF dst src));
17773 
17774   format %{ "addss   $dst, $src" %}
17775   ins_cost(150);
17776   ins_encode %{
17777     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17778   %}
17779   ins_pipe(pipe_slow);
17780 %}
17781 
17782 instruct addF_mem(regF dst, memory src) %{
17783   predicate(UseAVX == 0);
17784   match(Set dst (AddF dst (LoadF src)));
17785 
17786   format %{ "addss   $dst, $src" %}
17787   ins_cost(150);
17788   ins_encode %{
17789     __ addss($dst$$XMMRegister, $src$$Address);
17790   %}
17791   ins_pipe(pipe_slow);
17792 %}
17793 
17794 instruct addF_imm(regF dst, immF con) %{
17795   predicate(UseAVX == 0);
17796   match(Set dst (AddF dst con));
17797   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17798   ins_cost(150);
17799   ins_encode %{
17800     __ addss($dst$$XMMRegister, $constantaddress($con));
17801   %}
17802   ins_pipe(pipe_slow);
17803 %}
17804 
17805 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17806   predicate(UseAVX > 0);
17807   match(Set dst (AddF src1 src2));
17808 
17809   format %{ "vaddss  $dst, $src1, $src2" %}
17810   ins_cost(150);
17811   ins_encode %{
17812     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17813   %}
17814   ins_pipe(pipe_slow);
17815 %}
17816 
17817 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17818   predicate(UseAVX > 0);
17819   match(Set dst (AddF src1 (LoadF src2)));
17820 
17821   format %{ "vaddss  $dst, $src1, $src2" %}
17822   ins_cost(150);
17823   ins_encode %{
17824     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17825   %}
17826   ins_pipe(pipe_slow);
17827 %}
17828 
17829 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17830   predicate(UseAVX > 0);
17831   match(Set dst (AddF src con));
17832 
17833   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17834   ins_cost(150);
17835   ins_encode %{
17836     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17837   %}
17838   ins_pipe(pipe_slow);
17839 %}
17840 
17841 instruct addD_reg(regD dst, regD src) %{
17842   predicate(UseAVX == 0);
17843   match(Set dst (AddD dst src));
17844 
17845   format %{ "addsd   $dst, $src" %}
17846   ins_cost(150);
17847   ins_encode %{
17848     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17849   %}
17850   ins_pipe(pipe_slow);
17851 %}
17852 
17853 instruct addD_mem(regD dst, memory src) %{
17854   predicate(UseAVX == 0);
17855   match(Set dst (AddD dst (LoadD src)));
17856 
17857   format %{ "addsd   $dst, $src" %}
17858   ins_cost(150);
17859   ins_encode %{
17860     __ addsd($dst$$XMMRegister, $src$$Address);
17861   %}
17862   ins_pipe(pipe_slow);
17863 %}
17864 
17865 instruct addD_imm(regD dst, immD con) %{
17866   predicate(UseAVX == 0);
17867   match(Set dst (AddD dst con));
17868   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17869   ins_cost(150);
17870   ins_encode %{
17871     __ addsd($dst$$XMMRegister, $constantaddress($con));
17872   %}
17873   ins_pipe(pipe_slow);
17874 %}
17875 
17876 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17877   predicate(UseAVX > 0);
17878   match(Set dst (AddD src1 src2));
17879 
17880   format %{ "vaddsd  $dst, $src1, $src2" %}
17881   ins_cost(150);
17882   ins_encode %{
17883     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17884   %}
17885   ins_pipe(pipe_slow);
17886 %}
17887 
17888 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17889   predicate(UseAVX > 0);
17890   match(Set dst (AddD src1 (LoadD src2)));
17891 
17892   format %{ "vaddsd  $dst, $src1, $src2" %}
17893   ins_cost(150);
17894   ins_encode %{
17895     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17896   %}
17897   ins_pipe(pipe_slow);
17898 %}
17899 
17900 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17901   predicate(UseAVX > 0);
17902   match(Set dst (AddD src con));
17903 
17904   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17905   ins_cost(150);
17906   ins_encode %{
17907     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17908   %}
17909   ins_pipe(pipe_slow);
17910 %}
17911 
17912 instruct subF_reg(regF dst, regF src) %{
17913   predicate(UseAVX == 0);
17914   match(Set dst (SubF dst src));
17915 
17916   format %{ "subss   $dst, $src" %}
17917   ins_cost(150);
17918   ins_encode %{
17919     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17920   %}
17921   ins_pipe(pipe_slow);
17922 %}
17923 
17924 instruct subF_mem(regF dst, memory src) %{
17925   predicate(UseAVX == 0);
17926   match(Set dst (SubF dst (LoadF src)));
17927 
17928   format %{ "subss   $dst, $src" %}
17929   ins_cost(150);
17930   ins_encode %{
17931     __ subss($dst$$XMMRegister, $src$$Address);
17932   %}
17933   ins_pipe(pipe_slow);
17934 %}
17935 
17936 instruct subF_imm(regF dst, immF con) %{
17937   predicate(UseAVX == 0);
17938   match(Set dst (SubF dst con));
17939   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17940   ins_cost(150);
17941   ins_encode %{
17942     __ subss($dst$$XMMRegister, $constantaddress($con));
17943   %}
17944   ins_pipe(pipe_slow);
17945 %}
17946 
17947 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17948   predicate(UseAVX > 0);
17949   match(Set dst (SubF src1 src2));
17950 
17951   format %{ "vsubss  $dst, $src1, $src2" %}
17952   ins_cost(150);
17953   ins_encode %{
17954     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17955   %}
17956   ins_pipe(pipe_slow);
17957 %}
17958 
17959 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17960   predicate(UseAVX > 0);
17961   match(Set dst (SubF src1 (LoadF src2)));
17962 
17963   format %{ "vsubss  $dst, $src1, $src2" %}
17964   ins_cost(150);
17965   ins_encode %{
17966     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17967   %}
17968   ins_pipe(pipe_slow);
17969 %}
17970 
17971 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17972   predicate(UseAVX > 0);
17973   match(Set dst (SubF src con));
17974 
17975   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17976   ins_cost(150);
17977   ins_encode %{
17978     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17979   %}
17980   ins_pipe(pipe_slow);
17981 %}
17982 
17983 instruct subD_reg(regD dst, regD src) %{
17984   predicate(UseAVX == 0);
17985   match(Set dst (SubD dst src));
17986 
17987   format %{ "subsd   $dst, $src" %}
17988   ins_cost(150);
17989   ins_encode %{
17990     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17991   %}
17992   ins_pipe(pipe_slow);
17993 %}
17994 
17995 instruct subD_mem(regD dst, memory src) %{
17996   predicate(UseAVX == 0);
17997   match(Set dst (SubD dst (LoadD src)));
17998 
17999   format %{ "subsd   $dst, $src" %}
18000   ins_cost(150);
18001   ins_encode %{
18002     __ subsd($dst$$XMMRegister, $src$$Address);
18003   %}
18004   ins_pipe(pipe_slow);
18005 %}
18006 
18007 instruct subD_imm(regD dst, immD con) %{
18008   predicate(UseAVX == 0);
18009   match(Set dst (SubD dst con));
18010   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18011   ins_cost(150);
18012   ins_encode %{
18013     __ subsd($dst$$XMMRegister, $constantaddress($con));
18014   %}
18015   ins_pipe(pipe_slow);
18016 %}
18017 
18018 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
18019   predicate(UseAVX > 0);
18020   match(Set dst (SubD src1 src2));
18021 
18022   format %{ "vsubsd  $dst, $src1, $src2" %}
18023   ins_cost(150);
18024   ins_encode %{
18025     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18026   %}
18027   ins_pipe(pipe_slow);
18028 %}
18029 
18030 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
18031   predicate(UseAVX > 0);
18032   match(Set dst (SubD src1 (LoadD src2)));
18033 
18034   format %{ "vsubsd  $dst, $src1, $src2" %}
18035   ins_cost(150);
18036   ins_encode %{
18037     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18038   %}
18039   ins_pipe(pipe_slow);
18040 %}
18041 
18042 instruct subD_reg_imm(regD dst, regD src, immD con) %{
18043   predicate(UseAVX > 0);
18044   match(Set dst (SubD src con));
18045 
18046   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18047   ins_cost(150);
18048   ins_encode %{
18049     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18050   %}
18051   ins_pipe(pipe_slow);
18052 %}
18053 
18054 instruct mulF_reg(regF dst, regF src) %{
18055   predicate(UseAVX == 0);
18056   match(Set dst (MulF dst src));
18057 
18058   format %{ "mulss   $dst, $src" %}
18059   ins_cost(150);
18060   ins_encode %{
18061     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
18062   %}
18063   ins_pipe(pipe_slow);
18064 %}
18065 
18066 instruct mulF_mem(regF dst, memory src) %{
18067   predicate(UseAVX == 0);
18068   match(Set dst (MulF dst (LoadF src)));
18069 
18070   format %{ "mulss   $dst, $src" %}
18071   ins_cost(150);
18072   ins_encode %{
18073     __ mulss($dst$$XMMRegister, $src$$Address);
18074   %}
18075   ins_pipe(pipe_slow);
18076 %}
18077 
18078 instruct mulF_imm(regF dst, immF con) %{
18079   predicate(UseAVX == 0);
18080   match(Set dst (MulF dst con));
18081   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18082   ins_cost(150);
18083   ins_encode %{
18084     __ mulss($dst$$XMMRegister, $constantaddress($con));
18085   %}
18086   ins_pipe(pipe_slow);
18087 %}
18088 
18089 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
18090   predicate(UseAVX > 0);
18091   match(Set dst (MulF src1 src2));
18092 
18093   format %{ "vmulss  $dst, $src1, $src2" %}
18094   ins_cost(150);
18095   ins_encode %{
18096     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18097   %}
18098   ins_pipe(pipe_slow);
18099 %}
18100 
18101 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
18102   predicate(UseAVX > 0);
18103   match(Set dst (MulF src1 (LoadF src2)));
18104 
18105   format %{ "vmulss  $dst, $src1, $src2" %}
18106   ins_cost(150);
18107   ins_encode %{
18108     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18109   %}
18110   ins_pipe(pipe_slow);
18111 %}
18112 
18113 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
18114   predicate(UseAVX > 0);
18115   match(Set dst (MulF src con));
18116 
18117   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18118   ins_cost(150);
18119   ins_encode %{
18120     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18121   %}
18122   ins_pipe(pipe_slow);
18123 %}
18124 
18125 instruct mulD_reg(regD dst, regD src) %{
18126   predicate(UseAVX == 0);
18127   match(Set dst (MulD dst src));
18128 
18129   format %{ "mulsd   $dst, $src" %}
18130   ins_cost(150);
18131   ins_encode %{
18132     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
18133   %}
18134   ins_pipe(pipe_slow);
18135 %}
18136 
18137 instruct mulD_mem(regD dst, memory src) %{
18138   predicate(UseAVX == 0);
18139   match(Set dst (MulD dst (LoadD src)));
18140 
18141   format %{ "mulsd   $dst, $src" %}
18142   ins_cost(150);
18143   ins_encode %{
18144     __ mulsd($dst$$XMMRegister, $src$$Address);
18145   %}
18146   ins_pipe(pipe_slow);
18147 %}
18148 
18149 instruct mulD_imm(regD dst, immD con) %{
18150   predicate(UseAVX == 0);
18151   match(Set dst (MulD dst con));
18152   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18153   ins_cost(150);
18154   ins_encode %{
18155     __ mulsd($dst$$XMMRegister, $constantaddress($con));
18156   %}
18157   ins_pipe(pipe_slow);
18158 %}
18159 
18160 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
18161   predicate(UseAVX > 0);
18162   match(Set dst (MulD src1 src2));
18163 
18164   format %{ "vmulsd  $dst, $src1, $src2" %}
18165   ins_cost(150);
18166   ins_encode %{
18167     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18168   %}
18169   ins_pipe(pipe_slow);
18170 %}
18171 
18172 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
18173   predicate(UseAVX > 0);
18174   match(Set dst (MulD src1 (LoadD src2)));
18175 
18176   format %{ "vmulsd  $dst, $src1, $src2" %}
18177   ins_cost(150);
18178   ins_encode %{
18179     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18180   %}
18181   ins_pipe(pipe_slow);
18182 %}
18183 
18184 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
18185   predicate(UseAVX > 0);
18186   match(Set dst (MulD src con));
18187 
18188   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18189   ins_cost(150);
18190   ins_encode %{
18191     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18192   %}
18193   ins_pipe(pipe_slow);
18194 %}
18195 
18196 instruct divF_reg(regF dst, regF src) %{
18197   predicate(UseAVX == 0);
18198   match(Set dst (DivF dst src));
18199 
18200   format %{ "divss   $dst, $src" %}
18201   ins_cost(150);
18202   ins_encode %{
18203     __ divss($dst$$XMMRegister, $src$$XMMRegister);
18204   %}
18205   ins_pipe(pipe_slow);
18206 %}
18207 
18208 instruct divF_mem(regF dst, memory src) %{
18209   predicate(UseAVX == 0);
18210   match(Set dst (DivF dst (LoadF src)));
18211 
18212   format %{ "divss   $dst, $src" %}
18213   ins_cost(150);
18214   ins_encode %{
18215     __ divss($dst$$XMMRegister, $src$$Address);
18216   %}
18217   ins_pipe(pipe_slow);
18218 %}
18219 
18220 instruct divF_imm(regF dst, immF con) %{
18221   predicate(UseAVX == 0);
18222   match(Set dst (DivF dst con));
18223   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18224   ins_cost(150);
18225   ins_encode %{
18226     __ divss($dst$$XMMRegister, $constantaddress($con));
18227   %}
18228   ins_pipe(pipe_slow);
18229 %}
18230 
18231 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
18232   predicate(UseAVX > 0);
18233   match(Set dst (DivF src1 src2));
18234 
18235   format %{ "vdivss  $dst, $src1, $src2" %}
18236   ins_cost(150);
18237   ins_encode %{
18238     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18239   %}
18240   ins_pipe(pipe_slow);
18241 %}
18242 
18243 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
18244   predicate(UseAVX > 0);
18245   match(Set dst (DivF src1 (LoadF src2)));
18246 
18247   format %{ "vdivss  $dst, $src1, $src2" %}
18248   ins_cost(150);
18249   ins_encode %{
18250     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18251   %}
18252   ins_pipe(pipe_slow);
18253 %}
18254 
18255 instruct divF_reg_imm(regF dst, regF src, immF con) %{
18256   predicate(UseAVX > 0);
18257   match(Set dst (DivF src con));
18258 
18259   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18260   ins_cost(150);
18261   ins_encode %{
18262     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18263   %}
18264   ins_pipe(pipe_slow);
18265 %}
18266 
18267 instruct divD_reg(regD dst, regD src) %{
18268   predicate(UseAVX == 0);
18269   match(Set dst (DivD dst src));
18270 
18271   format %{ "divsd   $dst, $src" %}
18272   ins_cost(150);
18273   ins_encode %{
18274     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
18275   %}
18276   ins_pipe(pipe_slow);
18277 %}
18278 
18279 instruct divD_mem(regD dst, memory src) %{
18280   predicate(UseAVX == 0);
18281   match(Set dst (DivD dst (LoadD src)));
18282 
18283   format %{ "divsd   $dst, $src" %}
18284   ins_cost(150);
18285   ins_encode %{
18286     __ divsd($dst$$XMMRegister, $src$$Address);
18287   %}
18288   ins_pipe(pipe_slow);
18289 %}
18290 
18291 instruct divD_imm(regD dst, immD con) %{
18292   predicate(UseAVX == 0);
18293   match(Set dst (DivD dst con));
18294   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18295   ins_cost(150);
18296   ins_encode %{
18297     __ divsd($dst$$XMMRegister, $constantaddress($con));
18298   %}
18299   ins_pipe(pipe_slow);
18300 %}
18301 
18302 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
18303   predicate(UseAVX > 0);
18304   match(Set dst (DivD src1 src2));
18305 
18306   format %{ "vdivsd  $dst, $src1, $src2" %}
18307   ins_cost(150);
18308   ins_encode %{
18309     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18310   %}
18311   ins_pipe(pipe_slow);
18312 %}
18313 
18314 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18315   predicate(UseAVX > 0);
18316   match(Set dst (DivD src1 (LoadD src2)));
18317 
18318   format %{ "vdivsd  $dst, $src1, $src2" %}
18319   ins_cost(150);
18320   ins_encode %{
18321     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18322   %}
18323   ins_pipe(pipe_slow);
18324 %}
18325 
18326 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18327   predicate(UseAVX > 0);
18328   match(Set dst (DivD src con));
18329 
18330   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18331   ins_cost(150);
18332   ins_encode %{
18333     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18334   %}
18335   ins_pipe(pipe_slow);
18336 %}
18337 
18338 instruct absF_reg(regF dst) %{
18339   predicate(UseAVX == 0);
18340   match(Set dst (AbsF dst));
18341   ins_cost(150);
18342   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
18343   ins_encode %{
18344     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18345   %}
18346   ins_pipe(pipe_slow);
18347 %}
18348 
18349 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18350   predicate(UseAVX > 0);
18351   match(Set dst (AbsF src));
18352   ins_cost(150);
18353   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18354   ins_encode %{
18355     int vlen_enc = Assembler::AVX_128bit;
18356     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18357               ExternalAddress(float_signmask()), vlen_enc);
18358   %}
18359   ins_pipe(pipe_slow);
18360 %}
18361 
18362 instruct absD_reg(regD dst) %{
18363   predicate(UseAVX == 0);
18364   match(Set dst (AbsD dst));
18365   ins_cost(150);
18366   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
18367             "# abs double by sign masking" %}
18368   ins_encode %{
18369     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18370   %}
18371   ins_pipe(pipe_slow);
18372 %}
18373 
18374 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18375   predicate(UseAVX > 0);
18376   match(Set dst (AbsD src));
18377   ins_cost(150);
18378   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
18379             "# abs double by sign masking" %}
18380   ins_encode %{
18381     int vlen_enc = Assembler::AVX_128bit;
18382     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18383               ExternalAddress(double_signmask()), vlen_enc);
18384   %}
18385   ins_pipe(pipe_slow);
18386 %}
18387 
18388 instruct negF_reg(regF dst) %{
18389   predicate(UseAVX == 0);
18390   match(Set dst (NegF dst));
18391   ins_cost(150);
18392   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
18393   ins_encode %{
18394     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18395   %}
18396   ins_pipe(pipe_slow);
18397 %}
18398 
18399 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18400   predicate(UseAVX > 0);
18401   match(Set dst (NegF src));
18402   ins_cost(150);
18403   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18404   ins_encode %{
18405     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18406                  ExternalAddress(float_signflip()));
18407   %}
18408   ins_pipe(pipe_slow);
18409 %}
18410 
18411 instruct negD_reg(regD dst) %{
18412   predicate(UseAVX == 0);
18413   match(Set dst (NegD dst));
18414   ins_cost(150);
18415   format %{ "xorpd   $dst, [0x8000000000000000]\t"
18416             "# neg double by sign flipping" %}
18417   ins_encode %{
18418     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18419   %}
18420   ins_pipe(pipe_slow);
18421 %}
18422 
18423 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18424   predicate(UseAVX > 0);
18425   match(Set dst (NegD src));
18426   ins_cost(150);
18427   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
18428             "# neg double by sign flipping" %}
18429   ins_encode %{
18430     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18431                  ExternalAddress(double_signflip()));
18432   %}
18433   ins_pipe(pipe_slow);
18434 %}
18435 
18436 // sqrtss instruction needs destination register to be pre initialized for best performance
18437 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18438 instruct sqrtF_reg(regF dst) %{
18439   match(Set dst (SqrtF dst));
18440   format %{ "sqrtss  $dst, $dst" %}
18441   ins_encode %{
18442     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18443   %}
18444   ins_pipe(pipe_slow);
18445 %}
18446 
18447 // sqrtsd instruction needs destination register to be pre initialized for best performance
18448 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18449 instruct sqrtD_reg(regD dst) %{
18450   match(Set dst (SqrtD dst));
18451   format %{ "sqrtsd  $dst, $dst" %}
18452   ins_encode %{
18453     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18454   %}
18455   ins_pipe(pipe_slow);
18456 %}
18457 
18458 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18459   effect(TEMP tmp);
18460   match(Set dst (ConvF2HF src));
18461   ins_cost(125);
18462   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18463   ins_encode %{
18464     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18465   %}
18466   ins_pipe( pipe_slow );
18467 %}
18468 
18469 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18470   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18471   effect(TEMP ktmp, TEMP rtmp);
18472   match(Set mem (StoreC mem (ConvF2HF src)));
18473   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18474   ins_encode %{
18475     __ movl($rtmp$$Register, 0x1);
18476     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18477     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18478   %}
18479   ins_pipe( pipe_slow );
18480 %}
18481 
18482 instruct vconvF2HF(vec dst, vec src) %{
18483   match(Set dst (VectorCastF2HF src));
18484   format %{ "vector_conv_F2HF $dst $src" %}
18485   ins_encode %{
18486     int vlen_enc = vector_length_encoding(this, $src);
18487     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18488   %}
18489   ins_pipe( pipe_slow );
18490 %}
18491 
18492 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18493   predicate(n->as_StoreVector()->memory_size() >= 16);
18494   match(Set mem (StoreVector mem (VectorCastF2HF src)));
18495   format %{ "vcvtps2ph $mem,$src" %}
18496   ins_encode %{
18497     int vlen_enc = vector_length_encoding(this, $src);
18498     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18499   %}
18500   ins_pipe( pipe_slow );
18501 %}
18502 
18503 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18504   match(Set dst (ConvHF2F src));
18505   format %{ "vcvtph2ps $dst,$src" %}
18506   ins_encode %{
18507     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18508   %}
18509   ins_pipe( pipe_slow );
18510 %}
18511 
18512 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18513   match(Set dst (VectorCastHF2F (LoadVector mem)));
18514   format %{ "vcvtph2ps $dst,$mem" %}
18515   ins_encode %{
18516     int vlen_enc = vector_length_encoding(this);
18517     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18518   %}
18519   ins_pipe( pipe_slow );
18520 %}
18521 
18522 instruct vconvHF2F(vec dst, vec src) %{
18523   match(Set dst (VectorCastHF2F src));
18524   ins_cost(125);
18525   format %{ "vector_conv_HF2F $dst,$src" %}
18526   ins_encode %{
18527     int vlen_enc = vector_length_encoding(this);
18528     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18529   %}
18530   ins_pipe( pipe_slow );
18531 %}
18532 
18533 // ---------------------------------------- VectorReinterpret ------------------------------------
18534 instruct reinterpret_mask(kReg dst) %{
18535   predicate(n->bottom_type()->isa_vectmask() &&
18536             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18537   match(Set dst (VectorReinterpret dst));
18538   ins_cost(125);
18539   format %{ "vector_reinterpret $dst\t!" %}
18540   ins_encode %{
18541     // empty
18542   %}
18543   ins_pipe( pipe_slow );
18544 %}
18545 
18546 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18547   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18548             n->bottom_type()->isa_vectmask() &&
18549             n->in(1)->bottom_type()->isa_vectmask() &&
18550             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18551             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18552   match(Set dst (VectorReinterpret src));
18553   effect(TEMP xtmp);
18554   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18555   ins_encode %{
18556      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18557      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18558      assert(src_sz == dst_sz , "src and dst size mismatch");
18559      int vlen_enc = vector_length_encoding(src_sz);
18560      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18561      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18562   %}
18563   ins_pipe( pipe_slow );
18564 %}
18565 
18566 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18567   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18568             n->bottom_type()->isa_vectmask() &&
18569             n->in(1)->bottom_type()->isa_vectmask() &&
18570             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18571              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18572             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18573   match(Set dst (VectorReinterpret src));
18574   effect(TEMP xtmp);
18575   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18576   ins_encode %{
18577      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18578      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18579      assert(src_sz == dst_sz , "src and dst size mismatch");
18580      int vlen_enc = vector_length_encoding(src_sz);
18581      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18582      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18583   %}
18584   ins_pipe( pipe_slow );
18585 %}
18586 
18587 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18588   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18589             n->bottom_type()->isa_vectmask() &&
18590             n->in(1)->bottom_type()->isa_vectmask() &&
18591             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18592              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18593             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18594   match(Set dst (VectorReinterpret src));
18595   effect(TEMP xtmp);
18596   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18597   ins_encode %{
18598      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18599      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18600      assert(src_sz == dst_sz , "src and dst size mismatch");
18601      int vlen_enc = vector_length_encoding(src_sz);
18602      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18603      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18604   %}
18605   ins_pipe( pipe_slow );
18606 %}
18607 
18608 instruct reinterpret(vec dst) %{
18609   predicate(!n->bottom_type()->isa_vectmask() &&
18610             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18611   match(Set dst (VectorReinterpret dst));
18612   ins_cost(125);
18613   format %{ "vector_reinterpret $dst\t!" %}
18614   ins_encode %{
18615     // empty
18616   %}
18617   ins_pipe( pipe_slow );
18618 %}
18619 
18620 instruct reinterpret_expand(vec dst, vec src) %{
18621   predicate(UseAVX == 0 &&
18622             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18623   match(Set dst (VectorReinterpret src));
18624   ins_cost(125);
18625   effect(TEMP dst);
18626   format %{ "vector_reinterpret_expand $dst,$src" %}
18627   ins_encode %{
18628     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18629     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18630 
18631     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18632     if (src_vlen_in_bytes == 4) {
18633       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18634     } else {
18635       assert(src_vlen_in_bytes == 8, "");
18636       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18637     }
18638     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18639   %}
18640   ins_pipe( pipe_slow );
18641 %}
18642 
18643 instruct vreinterpret_expand4(legVec dst, vec src) %{
18644   predicate(UseAVX > 0 &&
18645             !n->bottom_type()->isa_vectmask() &&
18646             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18647             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18648   match(Set dst (VectorReinterpret src));
18649   ins_cost(125);
18650   format %{ "vector_reinterpret_expand $dst,$src" %}
18651   ins_encode %{
18652     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18653   %}
18654   ins_pipe( pipe_slow );
18655 %}
18656 
18657 
18658 instruct vreinterpret_expand(legVec dst, vec src) %{
18659   predicate(UseAVX > 0 &&
18660             !n->bottom_type()->isa_vectmask() &&
18661             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18662             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18663   match(Set dst (VectorReinterpret src));
18664   ins_cost(125);
18665   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18666   ins_encode %{
18667     switch (Matcher::vector_length_in_bytes(this, $src)) {
18668       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18669       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18670       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18671       default: ShouldNotReachHere();
18672     }
18673   %}
18674   ins_pipe( pipe_slow );
18675 %}
18676 
18677 instruct reinterpret_shrink(vec dst, legVec src) %{
18678   predicate(!n->bottom_type()->isa_vectmask() &&
18679             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18680   match(Set dst (VectorReinterpret src));
18681   ins_cost(125);
18682   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18683   ins_encode %{
18684     switch (Matcher::vector_length_in_bytes(this)) {
18685       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18686       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18687       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18688       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18689       default: ShouldNotReachHere();
18690     }
18691   %}
18692   ins_pipe( pipe_slow );
18693 %}
18694 
18695 // ----------------------------------------------------------------------------------------------------
18696 
18697 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18698   match(Set dst (RoundDoubleMode src rmode));
18699   format %{ "roundsd $dst,$src" %}
18700   ins_cost(150);
18701   ins_encode %{
18702     assert(UseSSE >= 4, "required");
18703     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18704       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18705     }
18706     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18707   %}
18708   ins_pipe(pipe_slow);
18709 %}
18710 
18711 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18712   match(Set dst (RoundDoubleMode con rmode));
18713   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18714   ins_cost(150);
18715   ins_encode %{
18716     assert(UseSSE >= 4, "required");
18717     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18718   %}
18719   ins_pipe(pipe_slow);
18720 %}
18721 
18722 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18723   predicate(Matcher::vector_length(n) < 8);
18724   match(Set dst (RoundDoubleModeV src rmode));
18725   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18726   ins_encode %{
18727     assert(UseAVX > 0, "required");
18728     int vlen_enc = vector_length_encoding(this);
18729     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18730   %}
18731   ins_pipe( pipe_slow );
18732 %}
18733 
18734 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18735   predicate(Matcher::vector_length(n) == 8);
18736   match(Set dst (RoundDoubleModeV src rmode));
18737   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18738   ins_encode %{
18739     assert(UseAVX > 2, "required");
18740     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18741   %}
18742   ins_pipe( pipe_slow );
18743 %}
18744 
18745 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18746   predicate(Matcher::vector_length(n) < 8);
18747   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18748   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18749   ins_encode %{
18750     assert(UseAVX > 0, "required");
18751     int vlen_enc = vector_length_encoding(this);
18752     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18753   %}
18754   ins_pipe( pipe_slow );
18755 %}
18756 
18757 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18758   predicate(Matcher::vector_length(n) == 8);
18759   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18760   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18761   ins_encode %{
18762     assert(UseAVX > 2, "required");
18763     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18764   %}
18765   ins_pipe( pipe_slow );
18766 %}
18767 
18768 instruct onspinwait() %{
18769   match(OnSpinWait);
18770   ins_cost(200);
18771 
18772   format %{
18773     $$template
18774     $$emit$$"pause\t! membar_onspinwait"
18775   %}
18776   ins_encode %{
18777     __ pause();
18778   %}
18779   ins_pipe(pipe_slow);
18780 %}
18781 
18782 // a * b + c
18783 instruct fmaD_reg(regD a, regD b, regD c) %{
18784   match(Set c (FmaD  c (Binary a b)));
18785   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18786   ins_cost(150);
18787   ins_encode %{
18788     assert(UseFMA, "Needs FMA instructions support.");
18789     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18790   %}
18791   ins_pipe( pipe_slow );
18792 %}
18793 
18794 // a * b + c
18795 instruct fmaF_reg(regF a, regF b, regF c) %{
18796   match(Set c (FmaF  c (Binary a b)));
18797   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18798   ins_cost(150);
18799   ins_encode %{
18800     assert(UseFMA, "Needs FMA instructions support.");
18801     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18802   %}
18803   ins_pipe( pipe_slow );
18804 %}
18805 
18806 // ====================VECTOR INSTRUCTIONS=====================================
18807 
18808 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18809 instruct MoveVec2Leg(legVec dst, vec src) %{
18810   match(Set dst src);
18811   format %{ "" %}
18812   ins_encode %{
18813     ShouldNotReachHere();
18814   %}
18815   ins_pipe( fpu_reg_reg );
18816 %}
18817 
18818 instruct MoveLeg2Vec(vec dst, legVec src) %{
18819   match(Set dst src);
18820   format %{ "" %}
18821   ins_encode %{
18822     ShouldNotReachHere();
18823   %}
18824   ins_pipe( fpu_reg_reg );
18825 %}
18826 
18827 // ============================================================================
18828 
18829 // Load vectors generic operand pattern
18830 instruct loadV(vec dst, memory mem) %{
18831   match(Set dst (LoadVector mem));
18832   ins_cost(125);
18833   format %{ "load_vector $dst,$mem" %}
18834   ins_encode %{
18835     BasicType bt = Matcher::vector_element_basic_type(this);
18836     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18837   %}
18838   ins_pipe( pipe_slow );
18839 %}
18840 
18841 // Store vectors generic operand pattern.
18842 instruct storeV(memory mem, vec src) %{
18843   match(Set mem (StoreVector mem src));
18844   ins_cost(145);
18845   format %{ "store_vector $mem,$src\n\t" %}
18846   ins_encode %{
18847     switch (Matcher::vector_length_in_bytes(this, $src)) {
18848       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18849       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18850       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18851       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18852       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18853       default: ShouldNotReachHere();
18854     }
18855   %}
18856   ins_pipe( pipe_slow );
18857 %}
18858 
18859 // ---------------------------------------- Gather ------------------------------------
18860 
18861 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18862 
18863 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18864   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18865             Matcher::vector_length_in_bytes(n) <= 32);
18866   match(Set dst (LoadVectorGather mem idx));
18867   effect(TEMP dst, TEMP tmp, TEMP mask);
18868   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18869   ins_encode %{
18870     int vlen_enc = vector_length_encoding(this);
18871     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18872     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18873     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18874     __ lea($tmp$$Register, $mem$$Address);
18875     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18876   %}
18877   ins_pipe( pipe_slow );
18878 %}
18879 
18880 
18881 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18882   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18883             !is_subword_type(Matcher::vector_element_basic_type(n)));
18884   match(Set dst (LoadVectorGather mem idx));
18885   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18886   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18887   ins_encode %{
18888     int vlen_enc = vector_length_encoding(this);
18889     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18890     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18891     __ lea($tmp$$Register, $mem$$Address);
18892     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18893   %}
18894   ins_pipe( pipe_slow );
18895 %}
18896 
18897 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18898   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18899             !is_subword_type(Matcher::vector_element_basic_type(n)));
18900   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18901   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18902   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18903   ins_encode %{
18904     assert(UseAVX > 2, "sanity");
18905     int vlen_enc = vector_length_encoding(this);
18906     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18907     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18908     // Note: Since gather instruction partially updates the opmask register used
18909     // for predication hense moving mask operand to a temporary.
18910     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18911     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18912     __ lea($tmp$$Register, $mem$$Address);
18913     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18914   %}
18915   ins_pipe( pipe_slow );
18916 %}
18917 
18918 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18919   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18920   match(Set dst (LoadVectorGather mem idx_base));
18921   effect(TEMP tmp, TEMP rtmp);
18922   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18923   ins_encode %{
18924     int vlen_enc = vector_length_encoding(this);
18925     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18926     __ lea($tmp$$Register, $mem$$Address);
18927     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18928   %}
18929   ins_pipe( pipe_slow );
18930 %}
18931 
18932 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18933                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18934   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18935   match(Set dst (LoadVectorGather mem idx_base));
18936   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18937   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18938   ins_encode %{
18939     int vlen_enc = vector_length_encoding(this);
18940     int vector_len = Matcher::vector_length(this);
18941     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18942     __ lea($tmp$$Register, $mem$$Address);
18943     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18944     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18945                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18946   %}
18947   ins_pipe( pipe_slow );
18948 %}
18949 
18950 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18951   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18952   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18953   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18954   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18955   ins_encode %{
18956     int vlen_enc = vector_length_encoding(this);
18957     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18958     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18959     __ lea($tmp$$Register, $mem$$Address);
18960     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18961     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18962   %}
18963   ins_pipe( pipe_slow );
18964 %}
18965 
18966 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18967                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18968   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18969   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18970   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18971   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18972   ins_encode %{
18973     int vlen_enc = vector_length_encoding(this);
18974     int vector_len = Matcher::vector_length(this);
18975     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18976     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18977     __ lea($tmp$$Register, $mem$$Address);
18978     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18979     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18980     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18981                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18982   %}
18983   ins_pipe( pipe_slow );
18984 %}
18985 
18986 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18987   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18988   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18989   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18990   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18991   ins_encode %{
18992     int vlen_enc = vector_length_encoding(this);
18993     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18994     __ lea($tmp$$Register, $mem$$Address);
18995     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18996     if (elem_bt == T_SHORT) {
18997       __ movl($mask_idx$$Register, 0x55555555);
18998       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18999     }
19000     __ xorl($mask_idx$$Register, $mask_idx$$Register);
19001     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
19002   %}
19003   ins_pipe( pipe_slow );
19004 %}
19005 
19006 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
19007                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
19008   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
19009   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
19010   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
19011   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
19012   ins_encode %{
19013     int vlen_enc = vector_length_encoding(this);
19014     int vector_len = Matcher::vector_length(this);
19015     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19016     __ lea($tmp$$Register, $mem$$Address);
19017     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
19018     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
19019     if (elem_bt == T_SHORT) {
19020       __ movl($mask_idx$$Register, 0x55555555);
19021       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
19022     }
19023     __ xorl($mask_idx$$Register, $mask_idx$$Register);
19024     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
19025                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
19026   %}
19027   ins_pipe( pipe_slow );
19028 %}
19029 
19030 // ====================Scatter=======================================
19031 
19032 // Scatter INT, LONG, FLOAT, DOUBLE
19033 
19034 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
19035   predicate(UseAVX > 2);
19036   match(Set mem (StoreVectorScatter mem (Binary src idx)));
19037   effect(TEMP tmp, TEMP ktmp);
19038   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
19039   ins_encode %{
19040     int vlen_enc = vector_length_encoding(this, $src);
19041     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
19042 
19043     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
19044     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
19045 
19046     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
19047     __ lea($tmp$$Register, $mem$$Address);
19048     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
19049   %}
19050   ins_pipe( pipe_slow );
19051 %}
19052 
19053 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
19054   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
19055   effect(TEMP tmp, TEMP ktmp);
19056   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
19057   ins_encode %{
19058     int vlen_enc = vector_length_encoding(this, $src);
19059     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
19060     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
19061     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
19062     // Note: Since scatter instruction partially updates the opmask register used
19063     // for predication hense moving mask operand to a temporary.
19064     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
19065     __ lea($tmp$$Register, $mem$$Address);
19066     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
19067   %}
19068   ins_pipe( pipe_slow );
19069 %}
19070 
19071 // ====================REPLICATE=======================================
19072 
19073 // Replicate byte scalar to be vector
19074 instruct vReplB_reg(vec dst, rRegI src) %{
19075   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
19076   match(Set dst (Replicate src));
19077   format %{ "replicateB $dst,$src" %}
19078   ins_encode %{
19079     uint vlen = Matcher::vector_length(this);
19080     if (UseAVX >= 2) {
19081       int vlen_enc = vector_length_encoding(this);
19082       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19083         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
19084         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
19085       } else {
19086         __ movdl($dst$$XMMRegister, $src$$Register);
19087         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19088       }
19089     } else {
19090        assert(UseAVX < 2, "");
19091       __ movdl($dst$$XMMRegister, $src$$Register);
19092       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
19093       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19094       if (vlen >= 16) {
19095         assert(vlen == 16, "");
19096         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19097       }
19098     }
19099   %}
19100   ins_pipe( pipe_slow );
19101 %}
19102 
19103 instruct ReplB_mem(vec dst, memory mem) %{
19104   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
19105   match(Set dst (Replicate (LoadB mem)));
19106   format %{ "replicateB $dst,$mem" %}
19107   ins_encode %{
19108     int vlen_enc = vector_length_encoding(this);
19109     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
19110   %}
19111   ins_pipe( pipe_slow );
19112 %}
19113 
19114 // ====================ReplicateS=======================================
19115 
19116 instruct vReplS_reg(vec dst, rRegI src) %{
19117   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
19118   match(Set dst (Replicate src));
19119   format %{ "replicateS $dst,$src" %}
19120   ins_encode %{
19121     uint vlen = Matcher::vector_length(this);
19122     int vlen_enc = vector_length_encoding(this);
19123     if (UseAVX >= 2) {
19124       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19125         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
19126         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
19127       } else {
19128         __ movdl($dst$$XMMRegister, $src$$Register);
19129         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19130       }
19131     } else {
19132       assert(UseAVX < 2, "");
19133       __ movdl($dst$$XMMRegister, $src$$Register);
19134       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19135       if (vlen >= 8) {
19136         assert(vlen == 8, "");
19137         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19138       }
19139     }
19140   %}
19141   ins_pipe( pipe_slow );
19142 %}
19143 
19144 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
19145   match(Set dst (Replicate con));
19146   effect(TEMP rtmp);
19147   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
19148   ins_encode %{
19149     int vlen_enc = vector_length_encoding(this);
19150     BasicType bt = Matcher::vector_element_basic_type(this);
19151     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
19152     __ movl($rtmp$$Register, $con$$constant);
19153     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19154   %}
19155   ins_pipe( pipe_slow );
19156 %}
19157 
19158 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
19159   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
19160   match(Set dst (Replicate src));
19161   effect(TEMP rtmp);
19162   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
19163   ins_encode %{
19164     int vlen_enc = vector_length_encoding(this);
19165     __ evmovw($rtmp$$Register, $src$$XMMRegister);
19166     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19167   %}
19168   ins_pipe( pipe_slow );
19169 %}
19170 
19171 instruct ReplS_mem(vec dst, memory mem) %{
19172   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
19173   match(Set dst (Replicate (LoadS mem)));
19174   format %{ "replicateS $dst,$mem" %}
19175   ins_encode %{
19176     int vlen_enc = vector_length_encoding(this);
19177     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
19178   %}
19179   ins_pipe( pipe_slow );
19180 %}
19181 
19182 // ====================ReplicateI=======================================
19183 
19184 instruct ReplI_reg(vec dst, rRegI src) %{
19185   predicate(Matcher::vector_element_basic_type(n) == T_INT);
19186   match(Set dst (Replicate src));
19187   format %{ "replicateI $dst,$src" %}
19188   ins_encode %{
19189     uint vlen = Matcher::vector_length(this);
19190     int vlen_enc = vector_length_encoding(this);
19191     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19192       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
19193     } else if (VM_Version::supports_avx2()) {
19194       __ movdl($dst$$XMMRegister, $src$$Register);
19195       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19196     } else {
19197       __ movdl($dst$$XMMRegister, $src$$Register);
19198       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19199     }
19200   %}
19201   ins_pipe( pipe_slow );
19202 %}
19203 
19204 instruct ReplI_mem(vec dst, memory mem) %{
19205   predicate(Matcher::vector_element_basic_type(n) == T_INT);
19206   match(Set dst (Replicate (LoadI mem)));
19207   format %{ "replicateI $dst,$mem" %}
19208   ins_encode %{
19209     int vlen_enc = vector_length_encoding(this);
19210     if (VM_Version::supports_avx2()) {
19211       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19212     } else if (VM_Version::supports_avx()) {
19213       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19214     } else {
19215       __ movdl($dst$$XMMRegister, $mem$$Address);
19216       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19217     }
19218   %}
19219   ins_pipe( pipe_slow );
19220 %}
19221 
19222 instruct ReplI_imm(vec dst, immI con) %{
19223   predicate(Matcher::is_non_long_integral_vector(n));
19224   match(Set dst (Replicate con));
19225   format %{ "replicateI $dst,$con" %}
19226   ins_encode %{
19227     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
19228                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
19229                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
19230     BasicType bt = Matcher::vector_element_basic_type(this);
19231     int vlen = Matcher::vector_length_in_bytes(this);
19232     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
19233   %}
19234   ins_pipe( pipe_slow );
19235 %}
19236 
19237 // Replicate scalar zero to be vector
19238 instruct ReplI_zero(vec dst, immI_0 zero) %{
19239   predicate(Matcher::is_non_long_integral_vector(n));
19240   match(Set dst (Replicate zero));
19241   format %{ "replicateI $dst,$zero" %}
19242   ins_encode %{
19243     int vlen_enc = vector_length_encoding(this);
19244     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19245       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19246     } else {
19247       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19248     }
19249   %}
19250   ins_pipe( fpu_reg_reg );
19251 %}
19252 
19253 instruct ReplI_M1(vec dst, immI_M1 con) %{
19254   predicate(Matcher::is_non_long_integral_vector(n));
19255   match(Set dst (Replicate con));
19256   format %{ "vallones $dst" %}
19257   ins_encode %{
19258     int vector_len = vector_length_encoding(this);
19259     __ vallones($dst$$XMMRegister, vector_len);
19260   %}
19261   ins_pipe( pipe_slow );
19262 %}
19263 
19264 // ====================ReplicateL=======================================
19265 
19266 // Replicate long (8 byte) scalar to be vector
19267 instruct ReplL_reg(vec dst, rRegL src) %{
19268   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19269   match(Set dst (Replicate src));
19270   format %{ "replicateL $dst,$src" %}
19271   ins_encode %{
19272     int vlen = Matcher::vector_length(this);
19273     int vlen_enc = vector_length_encoding(this);
19274     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19275       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
19276     } else if (VM_Version::supports_avx2()) {
19277       __ movdq($dst$$XMMRegister, $src$$Register);
19278       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19279     } else {
19280       __ movdq($dst$$XMMRegister, $src$$Register);
19281       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19282     }
19283   %}
19284   ins_pipe( pipe_slow );
19285 %}
19286 
19287 instruct ReplL_mem(vec dst, memory mem) %{
19288   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19289   match(Set dst (Replicate (LoadL mem)));
19290   format %{ "replicateL $dst,$mem" %}
19291   ins_encode %{
19292     int vlen_enc = vector_length_encoding(this);
19293     if (VM_Version::supports_avx2()) {
19294       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
19295     } else if (VM_Version::supports_sse3()) {
19296       __ movddup($dst$$XMMRegister, $mem$$Address);
19297     } else {
19298       __ movq($dst$$XMMRegister, $mem$$Address);
19299       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19300     }
19301   %}
19302   ins_pipe( pipe_slow );
19303 %}
19304 
19305 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
19306 instruct ReplL_imm(vec dst, immL con) %{
19307   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19308   match(Set dst (Replicate con));
19309   format %{ "replicateL $dst,$con" %}
19310   ins_encode %{
19311     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19312     int vlen = Matcher::vector_length_in_bytes(this);
19313     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
19314   %}
19315   ins_pipe( pipe_slow );
19316 %}
19317 
19318 instruct ReplL_zero(vec dst, immL0 zero) %{
19319   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19320   match(Set dst (Replicate zero));
19321   format %{ "replicateL $dst,$zero" %}
19322   ins_encode %{
19323     int vlen_enc = vector_length_encoding(this);
19324     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19325       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19326     } else {
19327       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19328     }
19329   %}
19330   ins_pipe( fpu_reg_reg );
19331 %}
19332 
19333 instruct ReplL_M1(vec dst, immL_M1 con) %{
19334   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19335   match(Set dst (Replicate con));
19336   format %{ "vallones $dst" %}
19337   ins_encode %{
19338     int vector_len = vector_length_encoding(this);
19339     __ vallones($dst$$XMMRegister, vector_len);
19340   %}
19341   ins_pipe( pipe_slow );
19342 %}
19343 
19344 // ====================ReplicateF=======================================
19345 
19346 instruct vReplF_reg(vec dst, vlRegF src) %{
19347   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19348   match(Set dst (Replicate src));
19349   format %{ "replicateF $dst,$src" %}
19350   ins_encode %{
19351     uint vlen = Matcher::vector_length(this);
19352     int vlen_enc = vector_length_encoding(this);
19353     if (vlen <= 4) {
19354       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19355     } else if (VM_Version::supports_avx2()) {
19356       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19357     } else {
19358       assert(vlen == 8, "sanity");
19359       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19360       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19361     }
19362   %}
19363   ins_pipe( pipe_slow );
19364 %}
19365 
19366 instruct ReplF_reg(vec dst, vlRegF src) %{
19367   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19368   match(Set dst (Replicate src));
19369   format %{ "replicateF $dst,$src" %}
19370   ins_encode %{
19371     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19372   %}
19373   ins_pipe( pipe_slow );
19374 %}
19375 
19376 instruct ReplF_mem(vec dst, memory mem) %{
19377   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19378   match(Set dst (Replicate (LoadF mem)));
19379   format %{ "replicateF $dst,$mem" %}
19380   ins_encode %{
19381     int vlen_enc = vector_length_encoding(this);
19382     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19383   %}
19384   ins_pipe( pipe_slow );
19385 %}
19386 
19387 // Replicate float scalar immediate to be vector by loading from const table.
19388 instruct ReplF_imm(vec dst, immF con) %{
19389   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19390   match(Set dst (Replicate con));
19391   format %{ "replicateF $dst,$con" %}
19392   ins_encode %{
19393     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19394                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19395     int vlen = Matcher::vector_length_in_bytes(this);
19396     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19397   %}
19398   ins_pipe( pipe_slow );
19399 %}
19400 
19401 instruct ReplF_zero(vec dst, immF0 zero) %{
19402   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19403   match(Set dst (Replicate zero));
19404   format %{ "replicateF $dst,$zero" %}
19405   ins_encode %{
19406     int vlen_enc = vector_length_encoding(this);
19407     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19408       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19409     } else {
19410       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19411     }
19412   %}
19413   ins_pipe( fpu_reg_reg );
19414 %}
19415 
19416 // ====================ReplicateD=======================================
19417 
19418 // Replicate double (8 bytes) scalar to be vector
19419 instruct vReplD_reg(vec dst, vlRegD src) %{
19420   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19421   match(Set dst (Replicate src));
19422   format %{ "replicateD $dst,$src" %}
19423   ins_encode %{
19424     uint vlen = Matcher::vector_length(this);
19425     int vlen_enc = vector_length_encoding(this);
19426     if (vlen <= 2) {
19427       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19428     } else if (VM_Version::supports_avx2()) {
19429       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19430     } else {
19431       assert(vlen == 4, "sanity");
19432       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19433       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19434     }
19435   %}
19436   ins_pipe( pipe_slow );
19437 %}
19438 
19439 instruct ReplD_reg(vec dst, vlRegD src) %{
19440   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19441   match(Set dst (Replicate src));
19442   format %{ "replicateD $dst,$src" %}
19443   ins_encode %{
19444     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19445   %}
19446   ins_pipe( pipe_slow );
19447 %}
19448 
19449 instruct ReplD_mem(vec dst, memory mem) %{
19450   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19451   match(Set dst (Replicate (LoadD mem)));
19452   format %{ "replicateD $dst,$mem" %}
19453   ins_encode %{
19454     if (Matcher::vector_length(this) >= 4) {
19455       int vlen_enc = vector_length_encoding(this);
19456       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19457     } else {
19458       __ movddup($dst$$XMMRegister, $mem$$Address);
19459     }
19460   %}
19461   ins_pipe( pipe_slow );
19462 %}
19463 
19464 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19465 instruct ReplD_imm(vec dst, immD con) %{
19466   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19467   match(Set dst (Replicate con));
19468   format %{ "replicateD $dst,$con" %}
19469   ins_encode %{
19470     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19471     int vlen = Matcher::vector_length_in_bytes(this);
19472     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19473   %}
19474   ins_pipe( pipe_slow );
19475 %}
19476 
19477 instruct ReplD_zero(vec dst, immD0 zero) %{
19478   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19479   match(Set dst (Replicate zero));
19480   format %{ "replicateD $dst,$zero" %}
19481   ins_encode %{
19482     int vlen_enc = vector_length_encoding(this);
19483     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19484       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19485     } else {
19486       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19487     }
19488   %}
19489   ins_pipe( fpu_reg_reg );
19490 %}
19491 
19492 // ====================VECTOR INSERT=======================================
19493 
19494 instruct insert(vec dst, rRegI val, immU8 idx) %{
19495   predicate(Matcher::vector_length_in_bytes(n) < 32);
19496   match(Set dst (VectorInsert (Binary dst val) idx));
19497   format %{ "vector_insert $dst,$val,$idx" %}
19498   ins_encode %{
19499     assert(UseSSE >= 4, "required");
19500     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19501 
19502     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19503 
19504     assert(is_integral_type(elem_bt), "");
19505     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19506 
19507     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19508   %}
19509   ins_pipe( pipe_slow );
19510 %}
19511 
19512 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19513   predicate(Matcher::vector_length_in_bytes(n) == 32);
19514   match(Set dst (VectorInsert (Binary src val) idx));
19515   effect(TEMP vtmp);
19516   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19517   ins_encode %{
19518     int vlen_enc = Assembler::AVX_256bit;
19519     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19520     int elem_per_lane = 16/type2aelembytes(elem_bt);
19521     int log2epr = log2(elem_per_lane);
19522 
19523     assert(is_integral_type(elem_bt), "sanity");
19524     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19525 
19526     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19527     uint y_idx = ($idx$$constant >> log2epr) & 1;
19528     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19529     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19530     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19531   %}
19532   ins_pipe( pipe_slow );
19533 %}
19534 
19535 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19536   predicate(Matcher::vector_length_in_bytes(n) == 64);
19537   match(Set dst (VectorInsert (Binary src val) idx));
19538   effect(TEMP vtmp);
19539   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19540   ins_encode %{
19541     assert(UseAVX > 2, "sanity");
19542 
19543     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19544     int elem_per_lane = 16/type2aelembytes(elem_bt);
19545     int log2epr = log2(elem_per_lane);
19546 
19547     assert(is_integral_type(elem_bt), "");
19548     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19549 
19550     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19551     uint y_idx = ($idx$$constant >> log2epr) & 3;
19552     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19553     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19554     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19555   %}
19556   ins_pipe( pipe_slow );
19557 %}
19558 
19559 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19560   predicate(Matcher::vector_length(n) == 2);
19561   match(Set dst (VectorInsert (Binary dst val) idx));
19562   format %{ "vector_insert $dst,$val,$idx" %}
19563   ins_encode %{
19564     assert(UseSSE >= 4, "required");
19565     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19566     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19567 
19568     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19569   %}
19570   ins_pipe( pipe_slow );
19571 %}
19572 
19573 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19574   predicate(Matcher::vector_length(n) == 4);
19575   match(Set dst (VectorInsert (Binary src val) idx));
19576   effect(TEMP vtmp);
19577   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19578   ins_encode %{
19579     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19580     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19581 
19582     uint x_idx = $idx$$constant & right_n_bits(1);
19583     uint y_idx = ($idx$$constant >> 1) & 1;
19584     int vlen_enc = Assembler::AVX_256bit;
19585     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19586     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19587     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19588   %}
19589   ins_pipe( pipe_slow );
19590 %}
19591 
19592 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19593   predicate(Matcher::vector_length(n) == 8);
19594   match(Set dst (VectorInsert (Binary src val) idx));
19595   effect(TEMP vtmp);
19596   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19597   ins_encode %{
19598     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19599     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19600 
19601     uint x_idx = $idx$$constant & right_n_bits(1);
19602     uint y_idx = ($idx$$constant >> 1) & 3;
19603     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19604     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19605     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19606   %}
19607   ins_pipe( pipe_slow );
19608 %}
19609 
19610 instruct insertF(vec dst, regF val, immU8 idx) %{
19611   predicate(Matcher::vector_length(n) < 8);
19612   match(Set dst (VectorInsert (Binary dst val) idx));
19613   format %{ "vector_insert $dst,$val,$idx" %}
19614   ins_encode %{
19615     assert(UseSSE >= 4, "sanity");
19616 
19617     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19618     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19619 
19620     uint x_idx = $idx$$constant & right_n_bits(2);
19621     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19622   %}
19623   ins_pipe( pipe_slow );
19624 %}
19625 
19626 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19627   predicate(Matcher::vector_length(n) >= 8);
19628   match(Set dst (VectorInsert (Binary src val) idx));
19629   effect(TEMP vtmp);
19630   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19631   ins_encode %{
19632     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19633     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19634 
19635     int vlen = Matcher::vector_length(this);
19636     uint x_idx = $idx$$constant & right_n_bits(2);
19637     if (vlen == 8) {
19638       uint y_idx = ($idx$$constant >> 2) & 1;
19639       int vlen_enc = Assembler::AVX_256bit;
19640       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19641       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19642       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19643     } else {
19644       assert(vlen == 16, "sanity");
19645       uint y_idx = ($idx$$constant >> 2) & 3;
19646       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19647       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19648       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19649     }
19650   %}
19651   ins_pipe( pipe_slow );
19652 %}
19653 
19654 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19655   predicate(Matcher::vector_length(n) == 2);
19656   match(Set dst (VectorInsert (Binary dst val) idx));
19657   effect(TEMP tmp);
19658   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19659   ins_encode %{
19660     assert(UseSSE >= 4, "sanity");
19661     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19662     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19663 
19664     __ movq($tmp$$Register, $val$$XMMRegister);
19665     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19666   %}
19667   ins_pipe( pipe_slow );
19668 %}
19669 
19670 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19671   predicate(Matcher::vector_length(n) == 4);
19672   match(Set dst (VectorInsert (Binary src val) idx));
19673   effect(TEMP vtmp, TEMP tmp);
19674   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19675   ins_encode %{
19676     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19677     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19678 
19679     uint x_idx = $idx$$constant & right_n_bits(1);
19680     uint y_idx = ($idx$$constant >> 1) & 1;
19681     int vlen_enc = Assembler::AVX_256bit;
19682     __ movq($tmp$$Register, $val$$XMMRegister);
19683     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19684     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19685     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19686   %}
19687   ins_pipe( pipe_slow );
19688 %}
19689 
19690 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19691   predicate(Matcher::vector_length(n) == 8);
19692   match(Set dst (VectorInsert (Binary src val) idx));
19693   effect(TEMP tmp, TEMP vtmp);
19694   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19695   ins_encode %{
19696     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19697     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19698 
19699     uint x_idx = $idx$$constant & right_n_bits(1);
19700     uint y_idx = ($idx$$constant >> 1) & 3;
19701     __ movq($tmp$$Register, $val$$XMMRegister);
19702     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19703     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19704     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19705   %}
19706   ins_pipe( pipe_slow );
19707 %}
19708 
19709 // ====================REDUCTION ARITHMETIC=======================================
19710 
19711 // =======================Int Reduction==========================================
19712 
19713 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19714   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19715   match(Set dst (AddReductionVI src1 src2));
19716   match(Set dst (MulReductionVI src1 src2));
19717   match(Set dst (AndReductionV  src1 src2));
19718   match(Set dst ( OrReductionV  src1 src2));
19719   match(Set dst (XorReductionV  src1 src2));
19720   match(Set dst (MinReductionV  src1 src2));
19721   match(Set dst (MaxReductionV  src1 src2));
19722   match(Set dst (UMinReductionV  src1 src2));
19723   match(Set dst (UMaxReductionV  src1 src2));
19724   effect(TEMP vtmp1, TEMP vtmp2);
19725   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19726   ins_encode %{
19727     int opcode = this->ideal_Opcode();
19728     int vlen = Matcher::vector_length(this, $src2);
19729     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19730   %}
19731   ins_pipe( pipe_slow );
19732 %}
19733 
19734 // =======================Long Reduction==========================================
19735 
19736 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19737   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19738   match(Set dst (AddReductionVL src1 src2));
19739   match(Set dst (MulReductionVL src1 src2));
19740   match(Set dst (AndReductionV  src1 src2));
19741   match(Set dst ( OrReductionV  src1 src2));
19742   match(Set dst (XorReductionV  src1 src2));
19743   match(Set dst (MinReductionV  src1 src2));
19744   match(Set dst (MaxReductionV  src1 src2));
19745   match(Set dst (UMinReductionV  src1 src2));
19746   match(Set dst (UMaxReductionV  src1 src2));
19747   effect(TEMP vtmp1, TEMP vtmp2);
19748   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19749   ins_encode %{
19750     int opcode = this->ideal_Opcode();
19751     int vlen = Matcher::vector_length(this, $src2);
19752     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19753   %}
19754   ins_pipe( pipe_slow );
19755 %}
19756 
19757 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19758   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19759   match(Set dst (AddReductionVL src1 src2));
19760   match(Set dst (MulReductionVL src1 src2));
19761   match(Set dst (AndReductionV  src1 src2));
19762   match(Set dst ( OrReductionV  src1 src2));
19763   match(Set dst (XorReductionV  src1 src2));
19764   match(Set dst (MinReductionV  src1 src2));
19765   match(Set dst (MaxReductionV  src1 src2));
19766   match(Set dst (UMinReductionV  src1 src2));
19767   match(Set dst (UMaxReductionV  src1 src2));
19768   effect(TEMP vtmp1, TEMP vtmp2);
19769   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19770   ins_encode %{
19771     int opcode = this->ideal_Opcode();
19772     int vlen = Matcher::vector_length(this, $src2);
19773     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19774   %}
19775   ins_pipe( pipe_slow );
19776 %}
19777 
19778 // =======================Float Reduction==========================================
19779 
19780 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19781   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19782   match(Set dst (AddReductionVF dst src));
19783   match(Set dst (MulReductionVF dst src));
19784   effect(TEMP dst, TEMP vtmp);
19785   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19786   ins_encode %{
19787     int opcode = this->ideal_Opcode();
19788     int vlen = Matcher::vector_length(this, $src);
19789     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19790   %}
19791   ins_pipe( pipe_slow );
19792 %}
19793 
19794 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19795   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19796   match(Set dst (AddReductionVF dst src));
19797   match(Set dst (MulReductionVF dst src));
19798   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19799   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19800   ins_encode %{
19801     int opcode = this->ideal_Opcode();
19802     int vlen = Matcher::vector_length(this, $src);
19803     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19804   %}
19805   ins_pipe( pipe_slow );
19806 %}
19807 
19808 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19809   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19810   match(Set dst (AddReductionVF dst src));
19811   match(Set dst (MulReductionVF dst src));
19812   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19813   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19814   ins_encode %{
19815     int opcode = this->ideal_Opcode();
19816     int vlen = Matcher::vector_length(this, $src);
19817     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19818   %}
19819   ins_pipe( pipe_slow );
19820 %}
19821 
19822 
19823 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19824   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19825   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19826   // src1 contains reduction identity
19827   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19828   match(Set dst (AddReductionVF src1 src2));
19829   match(Set dst (MulReductionVF src1 src2));
19830   effect(TEMP dst);
19831   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19832   ins_encode %{
19833     int opcode = this->ideal_Opcode();
19834     int vlen = Matcher::vector_length(this, $src2);
19835     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19836   %}
19837   ins_pipe( pipe_slow );
19838 %}
19839 
19840 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19841   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19842   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19843   // src1 contains reduction identity
19844   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19845   match(Set dst (AddReductionVF src1 src2));
19846   match(Set dst (MulReductionVF src1 src2));
19847   effect(TEMP dst, TEMP vtmp);
19848   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19849   ins_encode %{
19850     int opcode = this->ideal_Opcode();
19851     int vlen = Matcher::vector_length(this, $src2);
19852     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19853   %}
19854   ins_pipe( pipe_slow );
19855 %}
19856 
19857 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19858   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19859   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19860   // src1 contains reduction identity
19861   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19862   match(Set dst (AddReductionVF src1 src2));
19863   match(Set dst (MulReductionVF src1 src2));
19864   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19865   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19866   ins_encode %{
19867     int opcode = this->ideal_Opcode();
19868     int vlen = Matcher::vector_length(this, $src2);
19869     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19870   %}
19871   ins_pipe( pipe_slow );
19872 %}
19873 
19874 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19875   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19876   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19877   // src1 contains reduction identity
19878   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19879   match(Set dst (AddReductionVF src1 src2));
19880   match(Set dst (MulReductionVF src1 src2));
19881   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19882   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19883   ins_encode %{
19884     int opcode = this->ideal_Opcode();
19885     int vlen = Matcher::vector_length(this, $src2);
19886     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19887   %}
19888   ins_pipe( pipe_slow );
19889 %}
19890 
19891 // =======================Double Reduction==========================================
19892 
19893 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19894   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19895   match(Set dst (AddReductionVD dst src));
19896   match(Set dst (MulReductionVD dst src));
19897   effect(TEMP dst, TEMP vtmp);
19898   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19899   ins_encode %{
19900     int opcode = this->ideal_Opcode();
19901     int vlen = Matcher::vector_length(this, $src);
19902     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19903 %}
19904   ins_pipe( pipe_slow );
19905 %}
19906 
19907 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19908   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19909   match(Set dst (AddReductionVD dst src));
19910   match(Set dst (MulReductionVD dst src));
19911   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19912   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19913   ins_encode %{
19914     int opcode = this->ideal_Opcode();
19915     int vlen = Matcher::vector_length(this, $src);
19916     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19917   %}
19918   ins_pipe( pipe_slow );
19919 %}
19920 
19921 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19922   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19923   match(Set dst (AddReductionVD dst src));
19924   match(Set dst (MulReductionVD dst src));
19925   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19926   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19927   ins_encode %{
19928     int opcode = this->ideal_Opcode();
19929     int vlen = Matcher::vector_length(this, $src);
19930     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19931   %}
19932   ins_pipe( pipe_slow );
19933 %}
19934 
19935 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19936   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19937   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19938   // src1 contains reduction identity
19939   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19940   match(Set dst (AddReductionVD src1 src2));
19941   match(Set dst (MulReductionVD src1 src2));
19942   effect(TEMP dst);
19943   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19944   ins_encode %{
19945     int opcode = this->ideal_Opcode();
19946     int vlen = Matcher::vector_length(this, $src2);
19947     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19948 %}
19949   ins_pipe( pipe_slow );
19950 %}
19951 
19952 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19953   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19954   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19955   // src1 contains reduction identity
19956   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19957   match(Set dst (AddReductionVD src1 src2));
19958   match(Set dst (MulReductionVD src1 src2));
19959   effect(TEMP dst, TEMP vtmp);
19960   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19961   ins_encode %{
19962     int opcode = this->ideal_Opcode();
19963     int vlen = Matcher::vector_length(this, $src2);
19964     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19965   %}
19966   ins_pipe( pipe_slow );
19967 %}
19968 
19969 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19970   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19971   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19972   // src1 contains reduction identity
19973   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19974   match(Set dst (AddReductionVD src1 src2));
19975   match(Set dst (MulReductionVD src1 src2));
19976   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19977   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19978   ins_encode %{
19979     int opcode = this->ideal_Opcode();
19980     int vlen = Matcher::vector_length(this, $src2);
19981     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19982   %}
19983   ins_pipe( pipe_slow );
19984 %}
19985 
19986 // =======================Byte Reduction==========================================
19987 
19988 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19989   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19990   match(Set dst (AddReductionVI src1 src2));
19991   match(Set dst (AndReductionV  src1 src2));
19992   match(Set dst ( OrReductionV  src1 src2));
19993   match(Set dst (XorReductionV  src1 src2));
19994   match(Set dst (MinReductionV  src1 src2));
19995   match(Set dst (MaxReductionV  src1 src2));
19996   match(Set dst (UMinReductionV  src1 src2));
19997   match(Set dst (UMaxReductionV  src1 src2));
19998   effect(TEMP vtmp1, TEMP vtmp2);
19999   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
20000   ins_encode %{
20001     int opcode = this->ideal_Opcode();
20002     int vlen = Matcher::vector_length(this, $src2);
20003     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20004   %}
20005   ins_pipe( pipe_slow );
20006 %}
20007 
20008 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
20009   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
20010   match(Set dst (AddReductionVI src1 src2));
20011   match(Set dst (AndReductionV  src1 src2));
20012   match(Set dst ( OrReductionV  src1 src2));
20013   match(Set dst (XorReductionV  src1 src2));
20014   match(Set dst (MinReductionV  src1 src2));
20015   match(Set dst (MaxReductionV  src1 src2));
20016   match(Set dst (UMinReductionV  src1 src2));
20017   match(Set dst (UMaxReductionV  src1 src2));
20018   effect(TEMP vtmp1, TEMP vtmp2);
20019   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
20020   ins_encode %{
20021     int opcode = this->ideal_Opcode();
20022     int vlen = Matcher::vector_length(this, $src2);
20023     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20024   %}
20025   ins_pipe( pipe_slow );
20026 %}
20027 
20028 // =======================Short Reduction==========================================
20029 
20030 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
20031   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
20032   match(Set dst (AddReductionVI src1 src2));
20033   match(Set dst (MulReductionVI src1 src2));
20034   match(Set dst (AndReductionV  src1 src2));
20035   match(Set dst ( OrReductionV  src1 src2));
20036   match(Set dst (XorReductionV  src1 src2));
20037   match(Set dst (MinReductionV  src1 src2));
20038   match(Set dst (MaxReductionV  src1 src2));
20039   match(Set dst (UMinReductionV  src1 src2));
20040   match(Set dst (UMaxReductionV  src1 src2));
20041   effect(TEMP vtmp1, TEMP vtmp2);
20042   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
20043   ins_encode %{
20044     int opcode = this->ideal_Opcode();
20045     int vlen = Matcher::vector_length(this, $src2);
20046     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20047   %}
20048   ins_pipe( pipe_slow );
20049 %}
20050 
20051 // =======================Mul Reduction==========================================
20052 
20053 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
20054   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
20055             Matcher::vector_length(n->in(2)) <= 32); // src2
20056   match(Set dst (MulReductionVI src1 src2));
20057   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
20058   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
20059   ins_encode %{
20060     int opcode = this->ideal_Opcode();
20061     int vlen = Matcher::vector_length(this, $src2);
20062     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20063   %}
20064   ins_pipe( pipe_slow );
20065 %}
20066 
20067 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
20068   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
20069             Matcher::vector_length(n->in(2)) == 64); // src2
20070   match(Set dst (MulReductionVI src1 src2));
20071   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
20072   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
20073   ins_encode %{
20074     int opcode = this->ideal_Opcode();
20075     int vlen = Matcher::vector_length(this, $src2);
20076     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20077   %}
20078   ins_pipe( pipe_slow );
20079 %}
20080 
20081 //--------------------Min/Max Float Reduction --------------------
20082 // Float Min Reduction
20083 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20084                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20085   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20086             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20087              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20088             Matcher::vector_length(n->in(2)) == 2);
20089   match(Set dst (MinReductionV src1 src2));
20090   match(Set dst (MaxReductionV src1 src2));
20091   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20092   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20093   ins_encode %{
20094     assert(UseAVX > 0, "sanity");
20095 
20096     int opcode = this->ideal_Opcode();
20097     int vlen = Matcher::vector_length(this, $src2);
20098     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20099                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20100   %}
20101   ins_pipe( pipe_slow );
20102 %}
20103 
20104 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20105                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20106   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20107             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20108              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20109             Matcher::vector_length(n->in(2)) >= 4);
20110   match(Set dst (MinReductionV src1 src2));
20111   match(Set dst (MaxReductionV src1 src2));
20112   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20113   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20114   ins_encode %{
20115     assert(UseAVX > 0, "sanity");
20116 
20117     int opcode = this->ideal_Opcode();
20118     int vlen = Matcher::vector_length(this, $src2);
20119     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20120                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20121   %}
20122   ins_pipe( pipe_slow );
20123 %}
20124 
20125 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
20126                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20127   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20128             Matcher::vector_length(n->in(2)) == 2);
20129   match(Set dst (MinReductionV dst src));
20130   match(Set dst (MaxReductionV dst src));
20131   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20132   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20133   ins_encode %{
20134     assert(UseAVX > 0, "sanity");
20135 
20136     int opcode = this->ideal_Opcode();
20137     int vlen = Matcher::vector_length(this, $src);
20138     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20139                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20140   %}
20141   ins_pipe( pipe_slow );
20142 %}
20143 
20144 
20145 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
20146                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20147   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20148             Matcher::vector_length(n->in(2)) >= 4);
20149   match(Set dst (MinReductionV dst src));
20150   match(Set dst (MaxReductionV dst src));
20151   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20152   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20153   ins_encode %{
20154     assert(UseAVX > 0, "sanity");
20155 
20156     int opcode = this->ideal_Opcode();
20157     int vlen = Matcher::vector_length(this, $src);
20158     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20159                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20160   %}
20161   ins_pipe( pipe_slow );
20162 %}
20163 
20164 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
20165   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20166             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20167              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20168             Matcher::vector_length(n->in(2)) == 2);
20169   match(Set dst (MinReductionV src1 src2));
20170   match(Set dst (MaxReductionV src1 src2));
20171   effect(TEMP dst, TEMP xtmp1);
20172   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
20173   ins_encode %{
20174     int opcode = this->ideal_Opcode();
20175     int vlen = Matcher::vector_length(this, $src2);
20176     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20177                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20178   %}
20179   ins_pipe( pipe_slow );
20180 %}
20181 
20182 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
20183   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20184             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20185              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20186             Matcher::vector_length(n->in(2)) >= 4);
20187   match(Set dst (MinReductionV src1 src2));
20188   match(Set dst (MaxReductionV src1 src2));
20189   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20190   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
20191   ins_encode %{
20192     int opcode = this->ideal_Opcode();
20193     int vlen = Matcher::vector_length(this, $src2);
20194     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20195                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20196   %}
20197   ins_pipe( pipe_slow );
20198 %}
20199 
20200 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
20201   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20202             Matcher::vector_length(n->in(2)) == 2);
20203   match(Set dst (MinReductionV dst src));
20204   match(Set dst (MaxReductionV dst src));
20205   effect(TEMP dst, TEMP xtmp1);
20206   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
20207   ins_encode %{
20208     int opcode = this->ideal_Opcode();
20209     int vlen = Matcher::vector_length(this, $src);
20210     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20211                          $xtmp1$$XMMRegister);
20212   %}
20213   ins_pipe( pipe_slow );
20214 %}
20215 
20216 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
20217   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20218             Matcher::vector_length(n->in(2)) >= 4);
20219   match(Set dst (MinReductionV dst src));
20220   match(Set dst (MaxReductionV dst src));
20221   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20222   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
20223   ins_encode %{
20224     int opcode = this->ideal_Opcode();
20225     int vlen = Matcher::vector_length(this, $src);
20226     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20227                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20228   %}
20229   ins_pipe( pipe_slow );
20230 %}
20231 
20232 //--------------------Min Double Reduction --------------------
20233 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20234                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20235   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20236             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20237              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20238             Matcher::vector_length(n->in(2)) == 2);
20239   match(Set dst (MinReductionV src1 src2));
20240   match(Set dst (MaxReductionV src1 src2));
20241   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20242   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20243   ins_encode %{
20244     assert(UseAVX > 0, "sanity");
20245 
20246     int opcode = this->ideal_Opcode();
20247     int vlen = Matcher::vector_length(this, $src2);
20248     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20249                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20250   %}
20251   ins_pipe( pipe_slow );
20252 %}
20253 
20254 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20255                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20256   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20257             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20258              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20259             Matcher::vector_length(n->in(2)) >= 4);
20260   match(Set dst (MinReductionV src1 src2));
20261   match(Set dst (MaxReductionV src1 src2));
20262   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20263   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20264   ins_encode %{
20265     assert(UseAVX > 0, "sanity");
20266 
20267     int opcode = this->ideal_Opcode();
20268     int vlen = Matcher::vector_length(this, $src2);
20269     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20270                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20271   %}
20272   ins_pipe( pipe_slow );
20273 %}
20274 
20275 
20276 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
20277                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20278   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20279             Matcher::vector_length(n->in(2)) == 2);
20280   match(Set dst (MinReductionV dst src));
20281   match(Set dst (MaxReductionV dst src));
20282   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20283   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20284   ins_encode %{
20285     assert(UseAVX > 0, "sanity");
20286 
20287     int opcode = this->ideal_Opcode();
20288     int vlen = Matcher::vector_length(this, $src);
20289     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20290                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20291   %}
20292   ins_pipe( pipe_slow );
20293 %}
20294 
20295 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
20296                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20297   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20298             Matcher::vector_length(n->in(2)) >= 4);
20299   match(Set dst (MinReductionV dst src));
20300   match(Set dst (MaxReductionV dst src));
20301   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20302   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20303   ins_encode %{
20304     assert(UseAVX > 0, "sanity");
20305 
20306     int opcode = this->ideal_Opcode();
20307     int vlen = Matcher::vector_length(this, $src);
20308     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20309                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20310   %}
20311   ins_pipe( pipe_slow );
20312 %}
20313 
20314 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
20315   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20316             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20317              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20318             Matcher::vector_length(n->in(2)) == 2);
20319   match(Set dst (MinReductionV src1 src2));
20320   match(Set dst (MaxReductionV src1 src2));
20321   effect(TEMP dst, TEMP xtmp1);
20322   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20323   ins_encode %{
20324     int opcode = this->ideal_Opcode();
20325     int vlen = Matcher::vector_length(this, $src2);
20326     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20327                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
20328   %}
20329   ins_pipe( pipe_slow );
20330 %}
20331 
20332 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20333   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20334             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20335              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20336             Matcher::vector_length(n->in(2)) >= 4);
20337   match(Set dst (MinReductionV src1 src2));
20338   match(Set dst (MaxReductionV src1 src2));
20339   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20340   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20341   ins_encode %{
20342     int opcode = this->ideal_Opcode();
20343     int vlen = Matcher::vector_length(this, $src2);
20344     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20345                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20346   %}
20347   ins_pipe( pipe_slow );
20348 %}
20349 
20350 
20351 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20352   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20353             Matcher::vector_length(n->in(2)) == 2);
20354   match(Set dst (MinReductionV dst src));
20355   match(Set dst (MaxReductionV dst src));
20356   effect(TEMP dst, TEMP xtmp1);
20357   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20358   ins_encode %{
20359     int opcode = this->ideal_Opcode();
20360     int vlen = Matcher::vector_length(this, $src);
20361     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20362                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20363   %}
20364   ins_pipe( pipe_slow );
20365 %}
20366 
20367 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20368   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20369             Matcher::vector_length(n->in(2)) >= 4);
20370   match(Set dst (MinReductionV dst src));
20371   match(Set dst (MaxReductionV dst src));
20372   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20373   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20374   ins_encode %{
20375     int opcode = this->ideal_Opcode();
20376     int vlen = Matcher::vector_length(this, $src);
20377     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20378                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20379   %}
20380   ins_pipe( pipe_slow );
20381 %}
20382 
20383 // ====================VECTOR ARITHMETIC=======================================
20384 
20385 // --------------------------------- ADD --------------------------------------
20386 
20387 // Bytes vector add
20388 instruct vaddB(vec dst, vec src) %{
20389   predicate(UseAVX == 0);
20390   match(Set dst (AddVB dst src));
20391   format %{ "paddb   $dst,$src\t! add packedB" %}
20392   ins_encode %{
20393     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20394   %}
20395   ins_pipe( pipe_slow );
20396 %}
20397 
20398 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20399   predicate(UseAVX > 0);
20400   match(Set dst (AddVB src1 src2));
20401   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
20402   ins_encode %{
20403     int vlen_enc = vector_length_encoding(this);
20404     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20405   %}
20406   ins_pipe( pipe_slow );
20407 %}
20408 
20409 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20410   predicate((UseAVX > 0) &&
20411             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20412   match(Set dst (AddVB src (LoadVector mem)));
20413   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
20414   ins_encode %{
20415     int vlen_enc = vector_length_encoding(this);
20416     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20417   %}
20418   ins_pipe( pipe_slow );
20419 %}
20420 
20421 // Shorts/Chars vector add
20422 instruct vaddS(vec dst, vec src) %{
20423   predicate(UseAVX == 0);
20424   match(Set dst (AddVS dst src));
20425   format %{ "paddw   $dst,$src\t! add packedS" %}
20426   ins_encode %{
20427     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20428   %}
20429   ins_pipe( pipe_slow );
20430 %}
20431 
20432 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20433   predicate(UseAVX > 0);
20434   match(Set dst (AddVS src1 src2));
20435   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
20436   ins_encode %{
20437     int vlen_enc = vector_length_encoding(this);
20438     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20439   %}
20440   ins_pipe( pipe_slow );
20441 %}
20442 
20443 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20444   predicate((UseAVX > 0) &&
20445             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20446   match(Set dst (AddVS src (LoadVector mem)));
20447   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
20448   ins_encode %{
20449     int vlen_enc = vector_length_encoding(this);
20450     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20451   %}
20452   ins_pipe( pipe_slow );
20453 %}
20454 
20455 // Integers vector add
20456 instruct vaddI(vec dst, vec src) %{
20457   predicate(UseAVX == 0);
20458   match(Set dst (AddVI dst src));
20459   format %{ "paddd   $dst,$src\t! add packedI" %}
20460   ins_encode %{
20461     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20462   %}
20463   ins_pipe( pipe_slow );
20464 %}
20465 
20466 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20467   predicate(UseAVX > 0);
20468   match(Set dst (AddVI src1 src2));
20469   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
20470   ins_encode %{
20471     int vlen_enc = vector_length_encoding(this);
20472     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20473   %}
20474   ins_pipe( pipe_slow );
20475 %}
20476 
20477 
20478 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20479   predicate((UseAVX > 0) &&
20480             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20481   match(Set dst (AddVI src (LoadVector mem)));
20482   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
20483   ins_encode %{
20484     int vlen_enc = vector_length_encoding(this);
20485     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20486   %}
20487   ins_pipe( pipe_slow );
20488 %}
20489 
20490 // Longs vector add
20491 instruct vaddL(vec dst, vec src) %{
20492   predicate(UseAVX == 0);
20493   match(Set dst (AddVL dst src));
20494   format %{ "paddq   $dst,$src\t! add packedL" %}
20495   ins_encode %{
20496     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20497   %}
20498   ins_pipe( pipe_slow );
20499 %}
20500 
20501 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20502   predicate(UseAVX > 0);
20503   match(Set dst (AddVL src1 src2));
20504   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
20505   ins_encode %{
20506     int vlen_enc = vector_length_encoding(this);
20507     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20508   %}
20509   ins_pipe( pipe_slow );
20510 %}
20511 
20512 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20513   predicate((UseAVX > 0) &&
20514             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20515   match(Set dst (AddVL src (LoadVector mem)));
20516   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
20517   ins_encode %{
20518     int vlen_enc = vector_length_encoding(this);
20519     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20520   %}
20521   ins_pipe( pipe_slow );
20522 %}
20523 
20524 // Floats vector add
20525 instruct vaddF(vec dst, vec src) %{
20526   predicate(UseAVX == 0);
20527   match(Set dst (AddVF dst src));
20528   format %{ "addps   $dst,$src\t! add packedF" %}
20529   ins_encode %{
20530     __ addps($dst$$XMMRegister, $src$$XMMRegister);
20531   %}
20532   ins_pipe( pipe_slow );
20533 %}
20534 
20535 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20536   predicate(UseAVX > 0);
20537   match(Set dst (AddVF src1 src2));
20538   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20539   ins_encode %{
20540     int vlen_enc = vector_length_encoding(this);
20541     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20542   %}
20543   ins_pipe( pipe_slow );
20544 %}
20545 
20546 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20547   predicate((UseAVX > 0) &&
20548             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20549   match(Set dst (AddVF src (LoadVector mem)));
20550   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20551   ins_encode %{
20552     int vlen_enc = vector_length_encoding(this);
20553     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20554   %}
20555   ins_pipe( pipe_slow );
20556 %}
20557 
20558 // Doubles vector add
20559 instruct vaddD(vec dst, vec src) %{
20560   predicate(UseAVX == 0);
20561   match(Set dst (AddVD dst src));
20562   format %{ "addpd   $dst,$src\t! add packedD" %}
20563   ins_encode %{
20564     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20565   %}
20566   ins_pipe( pipe_slow );
20567 %}
20568 
20569 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20570   predicate(UseAVX > 0);
20571   match(Set dst (AddVD src1 src2));
20572   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20573   ins_encode %{
20574     int vlen_enc = vector_length_encoding(this);
20575     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20576   %}
20577   ins_pipe( pipe_slow );
20578 %}
20579 
20580 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20581   predicate((UseAVX > 0) &&
20582             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20583   match(Set dst (AddVD src (LoadVector mem)));
20584   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20585   ins_encode %{
20586     int vlen_enc = vector_length_encoding(this);
20587     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20588   %}
20589   ins_pipe( pipe_slow );
20590 %}
20591 
20592 // --------------------------------- SUB --------------------------------------
20593 
20594 // Bytes vector sub
20595 instruct vsubB(vec dst, vec src) %{
20596   predicate(UseAVX == 0);
20597   match(Set dst (SubVB dst src));
20598   format %{ "psubb   $dst,$src\t! sub packedB" %}
20599   ins_encode %{
20600     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20601   %}
20602   ins_pipe( pipe_slow );
20603 %}
20604 
20605 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20606   predicate(UseAVX > 0);
20607   match(Set dst (SubVB src1 src2));
20608   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20609   ins_encode %{
20610     int vlen_enc = vector_length_encoding(this);
20611     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20612   %}
20613   ins_pipe( pipe_slow );
20614 %}
20615 
20616 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20617   predicate((UseAVX > 0) &&
20618             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20619   match(Set dst (SubVB src (LoadVector mem)));
20620   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20621   ins_encode %{
20622     int vlen_enc = vector_length_encoding(this);
20623     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20624   %}
20625   ins_pipe( pipe_slow );
20626 %}
20627 
20628 // Shorts/Chars vector sub
20629 instruct vsubS(vec dst, vec src) %{
20630   predicate(UseAVX == 0);
20631   match(Set dst (SubVS dst src));
20632   format %{ "psubw   $dst,$src\t! sub packedS" %}
20633   ins_encode %{
20634     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20635   %}
20636   ins_pipe( pipe_slow );
20637 %}
20638 
20639 
20640 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20641   predicate(UseAVX > 0);
20642   match(Set dst (SubVS src1 src2));
20643   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20644   ins_encode %{
20645     int vlen_enc = vector_length_encoding(this);
20646     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20647   %}
20648   ins_pipe( pipe_slow );
20649 %}
20650 
20651 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20652   predicate((UseAVX > 0) &&
20653             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20654   match(Set dst (SubVS src (LoadVector mem)));
20655   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20656   ins_encode %{
20657     int vlen_enc = vector_length_encoding(this);
20658     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20659   %}
20660   ins_pipe( pipe_slow );
20661 %}
20662 
20663 // Integers vector sub
20664 instruct vsubI(vec dst, vec src) %{
20665   predicate(UseAVX == 0);
20666   match(Set dst (SubVI dst src));
20667   format %{ "psubd   $dst,$src\t! sub packedI" %}
20668   ins_encode %{
20669     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20670   %}
20671   ins_pipe( pipe_slow );
20672 %}
20673 
20674 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20675   predicate(UseAVX > 0);
20676   match(Set dst (SubVI src1 src2));
20677   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20678   ins_encode %{
20679     int vlen_enc = vector_length_encoding(this);
20680     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20681   %}
20682   ins_pipe( pipe_slow );
20683 %}
20684 
20685 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20686   predicate((UseAVX > 0) &&
20687             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20688   match(Set dst (SubVI src (LoadVector mem)));
20689   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20690   ins_encode %{
20691     int vlen_enc = vector_length_encoding(this);
20692     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20693   %}
20694   ins_pipe( pipe_slow );
20695 %}
20696 
20697 // Longs vector sub
20698 instruct vsubL(vec dst, vec src) %{
20699   predicate(UseAVX == 0);
20700   match(Set dst (SubVL dst src));
20701   format %{ "psubq   $dst,$src\t! sub packedL" %}
20702   ins_encode %{
20703     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20704   %}
20705   ins_pipe( pipe_slow );
20706 %}
20707 
20708 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20709   predicate(UseAVX > 0);
20710   match(Set dst (SubVL src1 src2));
20711   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20712   ins_encode %{
20713     int vlen_enc = vector_length_encoding(this);
20714     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20715   %}
20716   ins_pipe( pipe_slow );
20717 %}
20718 
20719 
20720 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20721   predicate((UseAVX > 0) &&
20722             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20723   match(Set dst (SubVL src (LoadVector mem)));
20724   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20725   ins_encode %{
20726     int vlen_enc = vector_length_encoding(this);
20727     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20728   %}
20729   ins_pipe( pipe_slow );
20730 %}
20731 
20732 // Floats vector sub
20733 instruct vsubF(vec dst, vec src) %{
20734   predicate(UseAVX == 0);
20735   match(Set dst (SubVF dst src));
20736   format %{ "subps   $dst,$src\t! sub packedF" %}
20737   ins_encode %{
20738     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20739   %}
20740   ins_pipe( pipe_slow );
20741 %}
20742 
20743 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20744   predicate(UseAVX > 0);
20745   match(Set dst (SubVF src1 src2));
20746   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20747   ins_encode %{
20748     int vlen_enc = vector_length_encoding(this);
20749     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20750   %}
20751   ins_pipe( pipe_slow );
20752 %}
20753 
20754 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20755   predicate((UseAVX > 0) &&
20756             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20757   match(Set dst (SubVF src (LoadVector mem)));
20758   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20759   ins_encode %{
20760     int vlen_enc = vector_length_encoding(this);
20761     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20762   %}
20763   ins_pipe( pipe_slow );
20764 %}
20765 
20766 // Doubles vector sub
20767 instruct vsubD(vec dst, vec src) %{
20768   predicate(UseAVX == 0);
20769   match(Set dst (SubVD dst src));
20770   format %{ "subpd   $dst,$src\t! sub packedD" %}
20771   ins_encode %{
20772     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20773   %}
20774   ins_pipe( pipe_slow );
20775 %}
20776 
20777 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20778   predicate(UseAVX > 0);
20779   match(Set dst (SubVD src1 src2));
20780   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20781   ins_encode %{
20782     int vlen_enc = vector_length_encoding(this);
20783     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20784   %}
20785   ins_pipe( pipe_slow );
20786 %}
20787 
20788 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20789   predicate((UseAVX > 0) &&
20790             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20791   match(Set dst (SubVD src (LoadVector mem)));
20792   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20793   ins_encode %{
20794     int vlen_enc = vector_length_encoding(this);
20795     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20796   %}
20797   ins_pipe( pipe_slow );
20798 %}
20799 
20800 // --------------------------------- MUL --------------------------------------
20801 
20802 // Byte vector mul
20803 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20804   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20805   match(Set dst (MulVB src1 src2));
20806   effect(TEMP dst, TEMP xtmp);
20807   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20808   ins_encode %{
20809     assert(UseSSE > 3, "required");
20810     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20811     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20812     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20813     __ psllw($dst$$XMMRegister, 8);
20814     __ psrlw($dst$$XMMRegister, 8);
20815     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20816   %}
20817   ins_pipe( pipe_slow );
20818 %}
20819 
20820 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20821   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20822   match(Set dst (MulVB src1 src2));
20823   effect(TEMP dst, TEMP xtmp);
20824   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20825   ins_encode %{
20826     assert(UseSSE > 3, "required");
20827     // Odd-index elements
20828     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20829     __ psrlw($dst$$XMMRegister, 8);
20830     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20831     __ psrlw($xtmp$$XMMRegister, 8);
20832     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20833     __ psllw($dst$$XMMRegister, 8);
20834     // Even-index elements
20835     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20836     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20837     __ psllw($xtmp$$XMMRegister, 8);
20838     __ psrlw($xtmp$$XMMRegister, 8);
20839     // Combine
20840     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20841   %}
20842   ins_pipe( pipe_slow );
20843 %}
20844 
20845 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20846   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20847   match(Set dst (MulVB src1 src2));
20848   effect(TEMP xtmp1, TEMP xtmp2);
20849   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20850   ins_encode %{
20851     int vlen_enc = vector_length_encoding(this);
20852     // Odd-index elements
20853     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20854     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20855     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20856     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20857     // Even-index elements
20858     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20859     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20860     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20861     // Combine
20862     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20863   %}
20864   ins_pipe( pipe_slow );
20865 %}
20866 
20867 // Shorts/Chars vector mul
20868 instruct vmulS(vec dst, vec src) %{
20869   predicate(UseAVX == 0);
20870   match(Set dst (MulVS dst src));
20871   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20872   ins_encode %{
20873     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20874   %}
20875   ins_pipe( pipe_slow );
20876 %}
20877 
20878 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20879   predicate(UseAVX > 0);
20880   match(Set dst (MulVS src1 src2));
20881   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20882   ins_encode %{
20883     int vlen_enc = vector_length_encoding(this);
20884     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20885   %}
20886   ins_pipe( pipe_slow );
20887 %}
20888 
20889 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20890   predicate((UseAVX > 0) &&
20891             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20892   match(Set dst (MulVS src (LoadVector mem)));
20893   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20894   ins_encode %{
20895     int vlen_enc = vector_length_encoding(this);
20896     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20897   %}
20898   ins_pipe( pipe_slow );
20899 %}
20900 
20901 // Integers vector mul
20902 instruct vmulI(vec dst, vec src) %{
20903   predicate(UseAVX == 0);
20904   match(Set dst (MulVI dst src));
20905   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20906   ins_encode %{
20907     assert(UseSSE > 3, "required");
20908     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20909   %}
20910   ins_pipe( pipe_slow );
20911 %}
20912 
20913 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20914   predicate(UseAVX > 0);
20915   match(Set dst (MulVI src1 src2));
20916   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20917   ins_encode %{
20918     int vlen_enc = vector_length_encoding(this);
20919     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20920   %}
20921   ins_pipe( pipe_slow );
20922 %}
20923 
20924 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20925   predicate((UseAVX > 0) &&
20926             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20927   match(Set dst (MulVI src (LoadVector mem)));
20928   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20929   ins_encode %{
20930     int vlen_enc = vector_length_encoding(this);
20931     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20932   %}
20933   ins_pipe( pipe_slow );
20934 %}
20935 
20936 // Longs vector mul
20937 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20938   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20939              VM_Version::supports_avx512dq()) ||
20940             VM_Version::supports_avx512vldq());
20941   match(Set dst (MulVL src1 src2));
20942   ins_cost(500);
20943   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20944   ins_encode %{
20945     assert(UseAVX > 2, "required");
20946     int vlen_enc = vector_length_encoding(this);
20947     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20948   %}
20949   ins_pipe( pipe_slow );
20950 %}
20951 
20952 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20953   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20954              VM_Version::supports_avx512dq()) ||
20955             (Matcher::vector_length_in_bytes(n) > 8 &&
20956              VM_Version::supports_avx512vldq()));
20957   match(Set dst (MulVL src (LoadVector mem)));
20958   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20959   ins_cost(500);
20960   ins_encode %{
20961     assert(UseAVX > 2, "required");
20962     int vlen_enc = vector_length_encoding(this);
20963     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20964   %}
20965   ins_pipe( pipe_slow );
20966 %}
20967 
20968 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20969   predicate(UseAVX == 0);
20970   match(Set dst (MulVL src1 src2));
20971   ins_cost(500);
20972   effect(TEMP dst, TEMP xtmp);
20973   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20974   ins_encode %{
20975     assert(VM_Version::supports_sse4_1(), "required");
20976     // Get the lo-hi products, only the lower 32 bits is in concerns
20977     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20978     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20979     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20980     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20981     __ psllq($dst$$XMMRegister, 32);
20982     // Get the lo-lo products
20983     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20984     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20985     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20986   %}
20987   ins_pipe( pipe_slow );
20988 %}
20989 
20990 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20991   predicate(UseAVX > 0 &&
20992             ((Matcher::vector_length_in_bytes(n) == 64 &&
20993               !VM_Version::supports_avx512dq()) ||
20994              (Matcher::vector_length_in_bytes(n) < 64 &&
20995               !VM_Version::supports_avx512vldq())));
20996   match(Set dst (MulVL src1 src2));
20997   effect(TEMP xtmp1, TEMP xtmp2);
20998   ins_cost(500);
20999   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
21000   ins_encode %{
21001     int vlen_enc = vector_length_encoding(this);
21002     // Get the lo-hi products, only the lower 32 bits is in concerns
21003     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
21004     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
21005     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
21006     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
21007     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
21008     // Get the lo-lo products
21009     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21010     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21011   %}
21012   ins_pipe( pipe_slow );
21013 %}
21014 
21015 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
21016   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
21017   match(Set dst (MulVL src1 src2));
21018   ins_cost(100);
21019   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
21020   ins_encode %{
21021     int vlen_enc = vector_length_encoding(this);
21022     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21023   %}
21024   ins_pipe( pipe_slow );
21025 %}
21026 
21027 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
21028   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
21029   match(Set dst (MulVL src1 src2));
21030   ins_cost(100);
21031   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
21032   ins_encode %{
21033     int vlen_enc = vector_length_encoding(this);
21034     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21035   %}
21036   ins_pipe( pipe_slow );
21037 %}
21038 
21039 // Floats vector mul
21040 instruct vmulF(vec dst, vec src) %{
21041   predicate(UseAVX == 0);
21042   match(Set dst (MulVF dst src));
21043   format %{ "mulps   $dst,$src\t! mul packedF" %}
21044   ins_encode %{
21045     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
21046   %}
21047   ins_pipe( pipe_slow );
21048 %}
21049 
21050 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
21051   predicate(UseAVX > 0);
21052   match(Set dst (MulVF src1 src2));
21053   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
21054   ins_encode %{
21055     int vlen_enc = vector_length_encoding(this);
21056     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21057   %}
21058   ins_pipe( pipe_slow );
21059 %}
21060 
21061 instruct vmulF_mem(vec dst, vec src, memory mem) %{
21062   predicate((UseAVX > 0) &&
21063             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21064   match(Set dst (MulVF src (LoadVector mem)));
21065   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
21066   ins_encode %{
21067     int vlen_enc = vector_length_encoding(this);
21068     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21069   %}
21070   ins_pipe( pipe_slow );
21071 %}
21072 
21073 // Doubles vector mul
21074 instruct vmulD(vec dst, vec src) %{
21075   predicate(UseAVX == 0);
21076   match(Set dst (MulVD dst src));
21077   format %{ "mulpd   $dst,$src\t! mul packedD" %}
21078   ins_encode %{
21079     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
21080   %}
21081   ins_pipe( pipe_slow );
21082 %}
21083 
21084 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
21085   predicate(UseAVX > 0);
21086   match(Set dst (MulVD src1 src2));
21087   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
21088   ins_encode %{
21089     int vlen_enc = vector_length_encoding(this);
21090     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21091   %}
21092   ins_pipe( pipe_slow );
21093 %}
21094 
21095 instruct vmulD_mem(vec dst, vec src, memory mem) %{
21096   predicate((UseAVX > 0) &&
21097             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21098   match(Set dst (MulVD src (LoadVector mem)));
21099   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
21100   ins_encode %{
21101     int vlen_enc = vector_length_encoding(this);
21102     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21103   %}
21104   ins_pipe( pipe_slow );
21105 %}
21106 
21107 // --------------------------------- DIV --------------------------------------
21108 
21109 // Floats vector div
21110 instruct vdivF(vec dst, vec src) %{
21111   predicate(UseAVX == 0);
21112   match(Set dst (DivVF dst src));
21113   format %{ "divps   $dst,$src\t! div packedF" %}
21114   ins_encode %{
21115     __ divps($dst$$XMMRegister, $src$$XMMRegister);
21116   %}
21117   ins_pipe( pipe_slow );
21118 %}
21119 
21120 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
21121   predicate(UseAVX > 0);
21122   match(Set dst (DivVF src1 src2));
21123   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
21124   ins_encode %{
21125     int vlen_enc = vector_length_encoding(this);
21126     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21127   %}
21128   ins_pipe( pipe_slow );
21129 %}
21130 
21131 instruct vdivF_mem(vec dst, vec src, memory mem) %{
21132   predicate((UseAVX > 0) &&
21133             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21134   match(Set dst (DivVF src (LoadVector mem)));
21135   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
21136   ins_encode %{
21137     int vlen_enc = vector_length_encoding(this);
21138     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21139   %}
21140   ins_pipe( pipe_slow );
21141 %}
21142 
21143 // Doubles vector div
21144 instruct vdivD(vec dst, vec src) %{
21145   predicate(UseAVX == 0);
21146   match(Set dst (DivVD dst src));
21147   format %{ "divpd   $dst,$src\t! div packedD" %}
21148   ins_encode %{
21149     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
21150   %}
21151   ins_pipe( pipe_slow );
21152 %}
21153 
21154 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
21155   predicate(UseAVX > 0);
21156   match(Set dst (DivVD src1 src2));
21157   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
21158   ins_encode %{
21159     int vlen_enc = vector_length_encoding(this);
21160     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21161   %}
21162   ins_pipe( pipe_slow );
21163 %}
21164 
21165 instruct vdivD_mem(vec dst, vec src, memory mem) %{
21166   predicate((UseAVX > 0) &&
21167             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21168   match(Set dst (DivVD src (LoadVector mem)));
21169   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
21170   ins_encode %{
21171     int vlen_enc = vector_length_encoding(this);
21172     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21173   %}
21174   ins_pipe( pipe_slow );
21175 %}
21176 
21177 // ------------------------------ MinMax ---------------------------------------
21178 
21179 // Byte, Short, Int vector Min/Max
21180 instruct minmax_reg_sse(vec dst, vec src) %{
21181   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21182             UseAVX == 0);
21183   match(Set dst (MinV dst src));
21184   match(Set dst (MaxV dst src));
21185   format %{ "vector_minmax  $dst,$src\t!  " %}
21186   ins_encode %{
21187     assert(UseSSE >= 4, "required");
21188 
21189     int opcode = this->ideal_Opcode();
21190     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21191     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
21192   %}
21193   ins_pipe( pipe_slow );
21194 %}
21195 
21196 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
21197   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21198             UseAVX > 0);
21199   match(Set dst (MinV src1 src2));
21200   match(Set dst (MaxV src1 src2));
21201   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
21202   ins_encode %{
21203     int opcode = this->ideal_Opcode();
21204     int vlen_enc = vector_length_encoding(this);
21205     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21206 
21207     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21208   %}
21209   ins_pipe( pipe_slow );
21210 %}
21211 
21212 // Long vector Min/Max
21213 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
21214   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
21215             UseAVX == 0);
21216   match(Set dst (MinV dst src));
21217   match(Set dst (MaxV src dst));
21218   effect(TEMP dst, TEMP tmp);
21219   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
21220   ins_encode %{
21221     assert(UseSSE >= 4, "required");
21222 
21223     int opcode = this->ideal_Opcode();
21224     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21225     assert(elem_bt == T_LONG, "sanity");
21226 
21227     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
21228   %}
21229   ins_pipe( pipe_slow );
21230 %}
21231 
21232 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
21233   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
21234             UseAVX > 0 && !VM_Version::supports_avx512vl());
21235   match(Set dst (MinV src1 src2));
21236   match(Set dst (MaxV src1 src2));
21237   effect(TEMP dst);
21238   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
21239   ins_encode %{
21240     int vlen_enc = vector_length_encoding(this);
21241     int opcode = this->ideal_Opcode();
21242     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21243     assert(elem_bt == T_LONG, "sanity");
21244 
21245     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21246   %}
21247   ins_pipe( pipe_slow );
21248 %}
21249 
21250 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
21251   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
21252             Matcher::vector_element_basic_type(n) == T_LONG);
21253   match(Set dst (MinV src1 src2));
21254   match(Set dst (MaxV src1 src2));
21255   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
21256   ins_encode %{
21257     assert(UseAVX > 2, "required");
21258 
21259     int vlen_enc = vector_length_encoding(this);
21260     int opcode = this->ideal_Opcode();
21261     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21262     assert(elem_bt == T_LONG, "sanity");
21263 
21264     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21265   %}
21266   ins_pipe( pipe_slow );
21267 %}
21268 
21269 // Float/Double vector Min/Max
21270 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
21271   predicate(VM_Version::supports_avx10_2() &&
21272             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21273   match(Set dst (MinV a b));
21274   match(Set dst (MaxV a b));
21275   format %{ "vector_minmaxFP  $dst, $a, $b" %}
21276   ins_encode %{
21277     int vlen_enc = vector_length_encoding(this);
21278     int opcode = this->ideal_Opcode();
21279     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21280     __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21281   %}
21282   ins_pipe( pipe_slow );
21283 %}
21284 
21285 // Float/Double vector Min/Max
21286 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
21287   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
21288             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
21289             UseAVX > 0);
21290   match(Set dst (MinV a b));
21291   match(Set dst (MaxV a b));
21292   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
21293   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
21294   ins_encode %{
21295     assert(UseAVX > 0, "required");
21296 
21297     int opcode = this->ideal_Opcode();
21298     int vlen_enc = vector_length_encoding(this);
21299     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21300 
21301     __ vminmax_fp(opcode, elem_bt,
21302                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21303                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21304   %}
21305   ins_pipe( pipe_slow );
21306 %}
21307 
21308 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
21309   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
21310             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21311   match(Set dst (MinV a b));
21312   match(Set dst (MaxV a b));
21313   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
21314   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21315   ins_encode %{
21316     assert(UseAVX > 2, "required");
21317 
21318     int opcode = this->ideal_Opcode();
21319     int vlen_enc = vector_length_encoding(this);
21320     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21321 
21322     __ evminmax_fp(opcode, elem_bt,
21323                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21324                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21325   %}
21326   ins_pipe( pipe_slow );
21327 %}
21328 
21329 // ------------------------------ Unsigned vector Min/Max ----------------------
21330 
21331 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21332   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21333   match(Set dst (UMinV a b));
21334   match(Set dst (UMaxV a b));
21335   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21336   ins_encode %{
21337     int opcode = this->ideal_Opcode();
21338     int vlen_enc = vector_length_encoding(this);
21339     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21340     assert(is_integral_type(elem_bt), "");
21341     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21342   %}
21343   ins_pipe( pipe_slow );
21344 %}
21345 
21346 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21347   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21348   match(Set dst (UMinV a (LoadVector b)));
21349   match(Set dst (UMaxV a (LoadVector b)));
21350   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21351   ins_encode %{
21352     int opcode = this->ideal_Opcode();
21353     int vlen_enc = vector_length_encoding(this);
21354     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21355     assert(is_integral_type(elem_bt), "");
21356     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21357   %}
21358   ins_pipe( pipe_slow );
21359 %}
21360 
21361 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21362   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21363   match(Set dst (UMinV a b));
21364   match(Set dst (UMaxV a b));
21365   effect(TEMP xtmp1, TEMP xtmp2);
21366   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21367   ins_encode %{
21368     int opcode = this->ideal_Opcode();
21369     int vlen_enc = vector_length_encoding(this);
21370     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21371   %}
21372   ins_pipe( pipe_slow );
21373 %}
21374 
21375 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21376   match(Set dst (UMinV (Binary dst src2) mask));
21377   match(Set dst (UMaxV (Binary dst src2) mask));
21378   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21379   ins_encode %{
21380     int vlen_enc = vector_length_encoding(this);
21381     BasicType bt = Matcher::vector_element_basic_type(this);
21382     int opc = this->ideal_Opcode();
21383     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21384                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21385   %}
21386   ins_pipe( pipe_slow );
21387 %}
21388 
21389 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21390   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21391   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21392   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21393   ins_encode %{
21394     int vlen_enc = vector_length_encoding(this);
21395     BasicType bt = Matcher::vector_element_basic_type(this);
21396     int opc = this->ideal_Opcode();
21397     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21398                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21399   %}
21400   ins_pipe( pipe_slow );
21401 %}
21402 
21403 // --------------------------------- Signum/CopySign ---------------------------
21404 
21405 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21406   match(Set dst (SignumF dst (Binary zero one)));
21407   effect(KILL cr);
21408   format %{ "signumF $dst, $dst" %}
21409   ins_encode %{
21410     int opcode = this->ideal_Opcode();
21411     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21412   %}
21413   ins_pipe( pipe_slow );
21414 %}
21415 
21416 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21417   match(Set dst (SignumD dst (Binary zero one)));
21418   effect(KILL cr);
21419   format %{ "signumD $dst, $dst" %}
21420   ins_encode %{
21421     int opcode = this->ideal_Opcode();
21422     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21423   %}
21424   ins_pipe( pipe_slow );
21425 %}
21426 
21427 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21428   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21429   match(Set dst (SignumVF src (Binary zero one)));
21430   match(Set dst (SignumVD src (Binary zero one)));
21431   effect(TEMP dst, TEMP xtmp1);
21432   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21433   ins_encode %{
21434     int opcode = this->ideal_Opcode();
21435     int vec_enc = vector_length_encoding(this);
21436     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21437                          $xtmp1$$XMMRegister, vec_enc);
21438   %}
21439   ins_pipe( pipe_slow );
21440 %}
21441 
21442 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21443   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21444   match(Set dst (SignumVF src (Binary zero one)));
21445   match(Set dst (SignumVD src (Binary zero one)));
21446   effect(TEMP dst, TEMP ktmp1);
21447   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21448   ins_encode %{
21449     int opcode = this->ideal_Opcode();
21450     int vec_enc = vector_length_encoding(this);
21451     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21452                           $ktmp1$$KRegister, vec_enc);
21453   %}
21454   ins_pipe( pipe_slow );
21455 %}
21456 
21457 // ---------------------------------------
21458 // For copySign use 0xE4 as writemask for vpternlog
21459 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21460 // C (xmm2) is set to 0x7FFFFFFF
21461 // Wherever xmm2 is 0, we want to pick from B (sign)
21462 // Wherever xmm2 is 1, we want to pick from A (src)
21463 //
21464 // A B C Result
21465 // 0 0 0 0
21466 // 0 0 1 0
21467 // 0 1 0 1
21468 // 0 1 1 0
21469 // 1 0 0 0
21470 // 1 0 1 1
21471 // 1 1 0 1
21472 // 1 1 1 1
21473 //
21474 // Result going from high bit to low bit is 0x11100100 = 0xe4
21475 // ---------------------------------------
21476 
21477 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21478   match(Set dst (CopySignF dst src));
21479   effect(TEMP tmp1, TEMP tmp2);
21480   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21481   ins_encode %{
21482     __ movl($tmp2$$Register, 0x7FFFFFFF);
21483     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21484     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21485   %}
21486   ins_pipe( pipe_slow );
21487 %}
21488 
21489 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21490   match(Set dst (CopySignD dst (Binary src zero)));
21491   ins_cost(100);
21492   effect(TEMP tmp1, TEMP tmp2);
21493   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21494   ins_encode %{
21495     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21496     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21497     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21498   %}
21499   ins_pipe( pipe_slow );
21500 %}
21501 
21502 //----------------------------- CompressBits/ExpandBits ------------------------
21503 
21504 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21505   predicate(n->bottom_type()->isa_int());
21506   match(Set dst (CompressBits src mask));
21507   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21508   ins_encode %{
21509     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21510   %}
21511   ins_pipe( pipe_slow );
21512 %}
21513 
21514 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21515   predicate(n->bottom_type()->isa_int());
21516   match(Set dst (ExpandBits src mask));
21517   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21518   ins_encode %{
21519     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21520   %}
21521   ins_pipe( pipe_slow );
21522 %}
21523 
21524 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21525   predicate(n->bottom_type()->isa_int());
21526   match(Set dst (CompressBits src (LoadI mask)));
21527   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21528   ins_encode %{
21529     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21530   %}
21531   ins_pipe( pipe_slow );
21532 %}
21533 
21534 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21535   predicate(n->bottom_type()->isa_int());
21536   match(Set dst (ExpandBits src (LoadI mask)));
21537   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21538   ins_encode %{
21539     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21540   %}
21541   ins_pipe( pipe_slow );
21542 %}
21543 
21544 // --------------------------------- Sqrt --------------------------------------
21545 
21546 instruct vsqrtF_reg(vec dst, vec src) %{
21547   match(Set dst (SqrtVF src));
21548   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21549   ins_encode %{
21550     assert(UseAVX > 0, "required");
21551     int vlen_enc = vector_length_encoding(this);
21552     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21553   %}
21554   ins_pipe( pipe_slow );
21555 %}
21556 
21557 instruct vsqrtF_mem(vec dst, memory mem) %{
21558   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21559   match(Set dst (SqrtVF (LoadVector mem)));
21560   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21561   ins_encode %{
21562     assert(UseAVX > 0, "required");
21563     int vlen_enc = vector_length_encoding(this);
21564     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21565   %}
21566   ins_pipe( pipe_slow );
21567 %}
21568 
21569 // Floating point vector sqrt
21570 instruct vsqrtD_reg(vec dst, vec src) %{
21571   match(Set dst (SqrtVD src));
21572   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21573   ins_encode %{
21574     assert(UseAVX > 0, "required");
21575     int vlen_enc = vector_length_encoding(this);
21576     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21577   %}
21578   ins_pipe( pipe_slow );
21579 %}
21580 
21581 instruct vsqrtD_mem(vec dst, memory mem) %{
21582   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21583   match(Set dst (SqrtVD (LoadVector mem)));
21584   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21585   ins_encode %{
21586     assert(UseAVX > 0, "required");
21587     int vlen_enc = vector_length_encoding(this);
21588     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21589   %}
21590   ins_pipe( pipe_slow );
21591 %}
21592 
21593 // ------------------------------ Shift ---------------------------------------
21594 
21595 // Left and right shift count vectors are the same on x86
21596 // (only lowest bits of xmm reg are used for count).
21597 instruct vshiftcnt(vec dst, rRegI cnt) %{
21598   match(Set dst (LShiftCntV cnt));
21599   match(Set dst (RShiftCntV cnt));
21600   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21601   ins_encode %{
21602     __ movdl($dst$$XMMRegister, $cnt$$Register);
21603   %}
21604   ins_pipe( pipe_slow );
21605 %}
21606 
21607 // Byte vector shift
21608 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21609   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21610   match(Set dst ( LShiftVB src shift));
21611   match(Set dst ( RShiftVB src shift));
21612   match(Set dst (URShiftVB src shift));
21613   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21614   format %{"vector_byte_shift $dst,$src,$shift" %}
21615   ins_encode %{
21616     assert(UseSSE > 3, "required");
21617     int opcode = this->ideal_Opcode();
21618     bool sign = (opcode != Op_URShiftVB);
21619     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21620     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21621     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21622     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21623     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21624   %}
21625   ins_pipe( pipe_slow );
21626 %}
21627 
21628 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21629   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21630             UseAVX <= 1);
21631   match(Set dst ( LShiftVB src shift));
21632   match(Set dst ( RShiftVB src shift));
21633   match(Set dst (URShiftVB src shift));
21634   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21635   format %{"vector_byte_shift $dst,$src,$shift" %}
21636   ins_encode %{
21637     assert(UseSSE > 3, "required");
21638     int opcode = this->ideal_Opcode();
21639     bool sign = (opcode != Op_URShiftVB);
21640     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21641     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21642     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21643     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21644     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21645     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21646     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21647     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21648     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21649   %}
21650   ins_pipe( pipe_slow );
21651 %}
21652 
21653 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21654   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21655             UseAVX > 1);
21656   match(Set dst ( LShiftVB src shift));
21657   match(Set dst ( RShiftVB src shift));
21658   match(Set dst (URShiftVB src shift));
21659   effect(TEMP dst, TEMP tmp);
21660   format %{"vector_byte_shift $dst,$src,$shift" %}
21661   ins_encode %{
21662     int opcode = this->ideal_Opcode();
21663     bool sign = (opcode != Op_URShiftVB);
21664     int vlen_enc = Assembler::AVX_256bit;
21665     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21666     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21667     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21668     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21669     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21670   %}
21671   ins_pipe( pipe_slow );
21672 %}
21673 
21674 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21675   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21676   match(Set dst ( LShiftVB src shift));
21677   match(Set dst ( RShiftVB src shift));
21678   match(Set dst (URShiftVB src shift));
21679   effect(TEMP dst, TEMP tmp);
21680   format %{"vector_byte_shift $dst,$src,$shift" %}
21681   ins_encode %{
21682     assert(UseAVX > 1, "required");
21683     int opcode = this->ideal_Opcode();
21684     bool sign = (opcode != Op_URShiftVB);
21685     int vlen_enc = Assembler::AVX_256bit;
21686     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21687     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21688     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21689     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21690     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21691     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21692     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21693     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21694     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21695   %}
21696   ins_pipe( pipe_slow );
21697 %}
21698 
21699 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21700   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21701   match(Set dst ( LShiftVB src shift));
21702   match(Set dst  (RShiftVB src shift));
21703   match(Set dst (URShiftVB src shift));
21704   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21705   format %{"vector_byte_shift $dst,$src,$shift" %}
21706   ins_encode %{
21707     assert(UseAVX > 2, "required");
21708     int opcode = this->ideal_Opcode();
21709     bool sign = (opcode != Op_URShiftVB);
21710     int vlen_enc = Assembler::AVX_512bit;
21711     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21712     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21713     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21714     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21715     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21716     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21717     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21718     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21719     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21720     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21721     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21722     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21723   %}
21724   ins_pipe( pipe_slow );
21725 %}
21726 
21727 // Shorts vector logical right shift produces incorrect Java result
21728 // for negative data because java code convert short value into int with
21729 // sign extension before a shift. But char vectors are fine since chars are
21730 // unsigned values.
21731 // Shorts/Chars vector left shift
21732 instruct vshiftS(vec dst, vec src, vec shift) %{
21733   predicate(!n->as_ShiftV()->is_var_shift());
21734   match(Set dst ( LShiftVS src shift));
21735   match(Set dst ( RShiftVS src shift));
21736   match(Set dst (URShiftVS src shift));
21737   effect(TEMP dst, USE src, USE shift);
21738   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21739   ins_encode %{
21740     int opcode = this->ideal_Opcode();
21741     if (UseAVX > 0) {
21742       int vlen_enc = vector_length_encoding(this);
21743       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21744     } else {
21745       int vlen = Matcher::vector_length(this);
21746       if (vlen == 2) {
21747         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21748         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21749       } else if (vlen == 4) {
21750         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21751         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21752       } else {
21753         assert (vlen == 8, "sanity");
21754         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21755         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21756       }
21757     }
21758   %}
21759   ins_pipe( pipe_slow );
21760 %}
21761 
21762 // Integers vector left shift
21763 instruct vshiftI(vec dst, vec src, vec shift) %{
21764   predicate(!n->as_ShiftV()->is_var_shift());
21765   match(Set dst ( LShiftVI src shift));
21766   match(Set dst ( RShiftVI src shift));
21767   match(Set dst (URShiftVI src shift));
21768   effect(TEMP dst, USE src, USE shift);
21769   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21770   ins_encode %{
21771     int opcode = this->ideal_Opcode();
21772     if (UseAVX > 0) {
21773       int vlen_enc = vector_length_encoding(this);
21774       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21775     } else {
21776       int vlen = Matcher::vector_length(this);
21777       if (vlen == 2) {
21778         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21779         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21780       } else {
21781         assert(vlen == 4, "sanity");
21782         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21783         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21784       }
21785     }
21786   %}
21787   ins_pipe( pipe_slow );
21788 %}
21789 
21790 // Integers vector left constant shift
21791 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21792   match(Set dst (LShiftVI src (LShiftCntV shift)));
21793   match(Set dst (RShiftVI src (RShiftCntV shift)));
21794   match(Set dst (URShiftVI src (RShiftCntV shift)));
21795   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21796   ins_encode %{
21797     int opcode = this->ideal_Opcode();
21798     if (UseAVX > 0) {
21799       int vector_len = vector_length_encoding(this);
21800       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21801     } else {
21802       int vlen = Matcher::vector_length(this);
21803       if (vlen == 2) {
21804         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21805         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21806       } else {
21807         assert(vlen == 4, "sanity");
21808         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21809         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21810       }
21811     }
21812   %}
21813   ins_pipe( pipe_slow );
21814 %}
21815 
21816 // Longs vector shift
21817 instruct vshiftL(vec dst, vec src, vec shift) %{
21818   predicate(!n->as_ShiftV()->is_var_shift());
21819   match(Set dst ( LShiftVL src shift));
21820   match(Set dst (URShiftVL src shift));
21821   effect(TEMP dst, USE src, USE shift);
21822   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21823   ins_encode %{
21824     int opcode = this->ideal_Opcode();
21825     if (UseAVX > 0) {
21826       int vlen_enc = vector_length_encoding(this);
21827       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21828     } else {
21829       assert(Matcher::vector_length(this) == 2, "");
21830       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21831       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21832     }
21833   %}
21834   ins_pipe( pipe_slow );
21835 %}
21836 
21837 // Longs vector constant shift
21838 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21839   match(Set dst (LShiftVL src (LShiftCntV shift)));
21840   match(Set dst (URShiftVL src (RShiftCntV shift)));
21841   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21842   ins_encode %{
21843     int opcode = this->ideal_Opcode();
21844     if (UseAVX > 0) {
21845       int vector_len = vector_length_encoding(this);
21846       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21847     } else {
21848       assert(Matcher::vector_length(this) == 2, "");
21849       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21850       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21851     }
21852   %}
21853   ins_pipe( pipe_slow );
21854 %}
21855 
21856 // -------------------ArithmeticRightShift -----------------------------------
21857 // Long vector arithmetic right shift
21858 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21859   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21860   match(Set dst (RShiftVL src shift));
21861   effect(TEMP dst, TEMP tmp);
21862   format %{ "vshiftq $dst,$src,$shift" %}
21863   ins_encode %{
21864     uint vlen = Matcher::vector_length(this);
21865     if (vlen == 2) {
21866       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21867       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21868       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21869       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21870       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21871       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21872     } else {
21873       assert(vlen == 4, "sanity");
21874       assert(UseAVX > 1, "required");
21875       int vlen_enc = Assembler::AVX_256bit;
21876       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21877       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21878       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21879       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21880       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21881     }
21882   %}
21883   ins_pipe( pipe_slow );
21884 %}
21885 
21886 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21887   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21888   match(Set dst (RShiftVL src shift));
21889   format %{ "vshiftq $dst,$src,$shift" %}
21890   ins_encode %{
21891     int vlen_enc = vector_length_encoding(this);
21892     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21893   %}
21894   ins_pipe( pipe_slow );
21895 %}
21896 
21897 // ------------------- Variable Shift -----------------------------
21898 // Byte variable shift
21899 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21900   predicate(Matcher::vector_length(n) <= 8 &&
21901             n->as_ShiftV()->is_var_shift() &&
21902             !VM_Version::supports_avx512bw());
21903   match(Set dst ( LShiftVB src shift));
21904   match(Set dst ( RShiftVB src shift));
21905   match(Set dst (URShiftVB src shift));
21906   effect(TEMP dst, TEMP vtmp);
21907   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21908   ins_encode %{
21909     assert(UseAVX >= 2, "required");
21910 
21911     int opcode = this->ideal_Opcode();
21912     int vlen_enc = Assembler::AVX_128bit;
21913     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21914     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21915   %}
21916   ins_pipe( pipe_slow );
21917 %}
21918 
21919 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21920   predicate(Matcher::vector_length(n) == 16 &&
21921             n->as_ShiftV()->is_var_shift() &&
21922             !VM_Version::supports_avx512bw());
21923   match(Set dst ( LShiftVB src shift));
21924   match(Set dst ( RShiftVB src shift));
21925   match(Set dst (URShiftVB src shift));
21926   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21927   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21928   ins_encode %{
21929     assert(UseAVX >= 2, "required");
21930 
21931     int opcode = this->ideal_Opcode();
21932     int vlen_enc = Assembler::AVX_128bit;
21933     // Shift lower half and get word result in dst
21934     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21935 
21936     // Shift upper half and get word result in vtmp1
21937     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21938     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21939     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21940 
21941     // Merge and down convert the two word results to byte in dst
21942     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21943   %}
21944   ins_pipe( pipe_slow );
21945 %}
21946 
21947 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21948   predicate(Matcher::vector_length(n) == 32 &&
21949             n->as_ShiftV()->is_var_shift() &&
21950             !VM_Version::supports_avx512bw());
21951   match(Set dst ( LShiftVB src shift));
21952   match(Set dst ( RShiftVB src shift));
21953   match(Set dst (URShiftVB src shift));
21954   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21955   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21956   ins_encode %{
21957     assert(UseAVX >= 2, "required");
21958 
21959     int opcode = this->ideal_Opcode();
21960     int vlen_enc = Assembler::AVX_128bit;
21961     // Process lower 128 bits and get result in dst
21962     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21963     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21964     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21965     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21966     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21967 
21968     // Process higher 128 bits and get result in vtmp3
21969     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21970     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21971     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21972     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21973     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21974     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21975     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21976 
21977     // Merge the two results in dst
21978     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21979   %}
21980   ins_pipe( pipe_slow );
21981 %}
21982 
21983 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21984   predicate(Matcher::vector_length(n) <= 32 &&
21985             n->as_ShiftV()->is_var_shift() &&
21986             VM_Version::supports_avx512bw());
21987   match(Set dst ( LShiftVB src shift));
21988   match(Set dst ( RShiftVB src shift));
21989   match(Set dst (URShiftVB src shift));
21990   effect(TEMP dst, TEMP vtmp);
21991   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21992   ins_encode %{
21993     assert(UseAVX > 2, "required");
21994 
21995     int opcode = this->ideal_Opcode();
21996     int vlen_enc = vector_length_encoding(this);
21997     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21998   %}
21999   ins_pipe( pipe_slow );
22000 %}
22001 
22002 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
22003   predicate(Matcher::vector_length(n) == 64 &&
22004             n->as_ShiftV()->is_var_shift() &&
22005             VM_Version::supports_avx512bw());
22006   match(Set dst ( LShiftVB src shift));
22007   match(Set dst ( RShiftVB src shift));
22008   match(Set dst (URShiftVB src shift));
22009   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22010   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
22011   ins_encode %{
22012     assert(UseAVX > 2, "required");
22013 
22014     int opcode = this->ideal_Opcode();
22015     int vlen_enc = Assembler::AVX_256bit;
22016     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
22017     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
22018     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
22019     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
22020     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
22021   %}
22022   ins_pipe( pipe_slow );
22023 %}
22024 
22025 // Short variable shift
22026 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
22027   predicate(Matcher::vector_length(n) <= 8 &&
22028             n->as_ShiftV()->is_var_shift() &&
22029             !VM_Version::supports_avx512bw());
22030   match(Set dst ( LShiftVS src shift));
22031   match(Set dst ( RShiftVS src shift));
22032   match(Set dst (URShiftVS src shift));
22033   effect(TEMP dst, TEMP vtmp);
22034   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
22035   ins_encode %{
22036     assert(UseAVX >= 2, "required");
22037 
22038     int opcode = this->ideal_Opcode();
22039     bool sign = (opcode != Op_URShiftVS);
22040     int vlen_enc = Assembler::AVX_256bit;
22041     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
22042     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
22043     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22044     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22045     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
22046     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22047   %}
22048   ins_pipe( pipe_slow );
22049 %}
22050 
22051 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
22052   predicate(Matcher::vector_length(n) == 16 &&
22053             n->as_ShiftV()->is_var_shift() &&
22054             !VM_Version::supports_avx512bw());
22055   match(Set dst ( LShiftVS src shift));
22056   match(Set dst ( RShiftVS src shift));
22057   match(Set dst (URShiftVS src shift));
22058   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22059   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
22060   ins_encode %{
22061     assert(UseAVX >= 2, "required");
22062 
22063     int opcode = this->ideal_Opcode();
22064     bool sign = (opcode != Op_URShiftVS);
22065     int vlen_enc = Assembler::AVX_256bit;
22066     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
22067     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
22068     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22069     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22070     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22071 
22072     // Shift upper half, with result in dst using vtmp1 as TEMP
22073     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
22074     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
22075     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22076     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22077     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22078     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22079 
22080     // Merge lower and upper half result into dst
22081     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22082     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
22083   %}
22084   ins_pipe( pipe_slow );
22085 %}
22086 
22087 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
22088   predicate(n->as_ShiftV()->is_var_shift() &&
22089             VM_Version::supports_avx512bw());
22090   match(Set dst ( LShiftVS src shift));
22091   match(Set dst ( RShiftVS src shift));
22092   match(Set dst (URShiftVS src shift));
22093   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
22094   ins_encode %{
22095     assert(UseAVX > 2, "required");
22096 
22097     int opcode = this->ideal_Opcode();
22098     int vlen_enc = vector_length_encoding(this);
22099     if (!VM_Version::supports_avx512vl()) {
22100       vlen_enc = Assembler::AVX_512bit;
22101     }
22102     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22103   %}
22104   ins_pipe( pipe_slow );
22105 %}
22106 
22107 //Integer variable shift
22108 instruct vshiftI_var(vec dst, vec src, vec shift) %{
22109   predicate(n->as_ShiftV()->is_var_shift());
22110   match(Set dst ( LShiftVI src shift));
22111   match(Set dst ( RShiftVI src shift));
22112   match(Set dst (URShiftVI src shift));
22113   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
22114   ins_encode %{
22115     assert(UseAVX >= 2, "required");
22116 
22117     int opcode = this->ideal_Opcode();
22118     int vlen_enc = vector_length_encoding(this);
22119     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22120   %}
22121   ins_pipe( pipe_slow );
22122 %}
22123 
22124 //Long variable shift
22125 instruct vshiftL_var(vec dst, vec src, vec shift) %{
22126   predicate(n->as_ShiftV()->is_var_shift());
22127   match(Set dst ( LShiftVL src shift));
22128   match(Set dst (URShiftVL src shift));
22129   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
22130   ins_encode %{
22131     assert(UseAVX >= 2, "required");
22132 
22133     int opcode = this->ideal_Opcode();
22134     int vlen_enc = vector_length_encoding(this);
22135     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22136   %}
22137   ins_pipe( pipe_slow );
22138 %}
22139 
22140 //Long variable right shift arithmetic
22141 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
22142   predicate(Matcher::vector_length(n) <= 4 &&
22143             n->as_ShiftV()->is_var_shift() &&
22144             UseAVX == 2);
22145   match(Set dst (RShiftVL src shift));
22146   effect(TEMP dst, TEMP vtmp);
22147   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
22148   ins_encode %{
22149     int opcode = this->ideal_Opcode();
22150     int vlen_enc = vector_length_encoding(this);
22151     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
22152                  $vtmp$$XMMRegister);
22153   %}
22154   ins_pipe( pipe_slow );
22155 %}
22156 
22157 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
22158   predicate(n->as_ShiftV()->is_var_shift() &&
22159             UseAVX > 2);
22160   match(Set dst (RShiftVL src shift));
22161   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
22162   ins_encode %{
22163     int opcode = this->ideal_Opcode();
22164     int vlen_enc = vector_length_encoding(this);
22165     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22166   %}
22167   ins_pipe( pipe_slow );
22168 %}
22169 
22170 // --------------------------------- AND --------------------------------------
22171 
22172 instruct vand(vec dst, vec src) %{
22173   predicate(UseAVX == 0);
22174   match(Set dst (AndV dst src));
22175   format %{ "pand    $dst,$src\t! and vectors" %}
22176   ins_encode %{
22177     __ pand($dst$$XMMRegister, $src$$XMMRegister);
22178   %}
22179   ins_pipe( pipe_slow );
22180 %}
22181 
22182 instruct vand_reg(vec dst, vec src1, vec src2) %{
22183   predicate(UseAVX > 0);
22184   match(Set dst (AndV src1 src2));
22185   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
22186   ins_encode %{
22187     int vlen_enc = vector_length_encoding(this);
22188     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22189   %}
22190   ins_pipe( pipe_slow );
22191 %}
22192 
22193 instruct vand_mem(vec dst, vec src, memory mem) %{
22194   predicate((UseAVX > 0) &&
22195             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22196   match(Set dst (AndV src (LoadVector mem)));
22197   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
22198   ins_encode %{
22199     int vlen_enc = vector_length_encoding(this);
22200     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22201   %}
22202   ins_pipe( pipe_slow );
22203 %}
22204 
22205 // --------------------------------- OR ---------------------------------------
22206 
22207 instruct vor(vec dst, vec src) %{
22208   predicate(UseAVX == 0);
22209   match(Set dst (OrV dst src));
22210   format %{ "por     $dst,$src\t! or vectors" %}
22211   ins_encode %{
22212     __ por($dst$$XMMRegister, $src$$XMMRegister);
22213   %}
22214   ins_pipe( pipe_slow );
22215 %}
22216 
22217 instruct vor_reg(vec dst, vec src1, vec src2) %{
22218   predicate(UseAVX > 0);
22219   match(Set dst (OrV src1 src2));
22220   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
22221   ins_encode %{
22222     int vlen_enc = vector_length_encoding(this);
22223     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22224   %}
22225   ins_pipe( pipe_slow );
22226 %}
22227 
22228 instruct vor_mem(vec dst, vec src, memory mem) %{
22229   predicate((UseAVX > 0) &&
22230             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22231   match(Set dst (OrV src (LoadVector mem)));
22232   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
22233   ins_encode %{
22234     int vlen_enc = vector_length_encoding(this);
22235     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22236   %}
22237   ins_pipe( pipe_slow );
22238 %}
22239 
22240 // --------------------------------- XOR --------------------------------------
22241 
22242 instruct vxor(vec dst, vec src) %{
22243   predicate(UseAVX == 0);
22244   match(Set dst (XorV dst src));
22245   format %{ "pxor    $dst,$src\t! xor vectors" %}
22246   ins_encode %{
22247     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
22248   %}
22249   ins_pipe( pipe_slow );
22250 %}
22251 
22252 instruct vxor_reg(vec dst, vec src1, vec src2) %{
22253   predicate(UseAVX > 0);
22254   match(Set dst (XorV src1 src2));
22255   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
22256   ins_encode %{
22257     int vlen_enc = vector_length_encoding(this);
22258     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22259   %}
22260   ins_pipe( pipe_slow );
22261 %}
22262 
22263 instruct vxor_mem(vec dst, vec src, memory mem) %{
22264   predicate((UseAVX > 0) &&
22265             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22266   match(Set dst (XorV src (LoadVector mem)));
22267   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
22268   ins_encode %{
22269     int vlen_enc = vector_length_encoding(this);
22270     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22271   %}
22272   ins_pipe( pipe_slow );
22273 %}
22274 
22275 // --------------------------------- VectorCast --------------------------------------
22276 
22277 instruct vcastBtoX(vec dst, vec src) %{
22278   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
22279   match(Set dst (VectorCastB2X src));
22280   format %{ "vector_cast_b2x $dst,$src\t!" %}
22281   ins_encode %{
22282     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22283     int vlen_enc = vector_length_encoding(this);
22284     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22285   %}
22286   ins_pipe( pipe_slow );
22287 %}
22288 
22289 instruct vcastBtoD(legVec dst, legVec src) %{
22290   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
22291   match(Set dst (VectorCastB2X src));
22292   format %{ "vector_cast_b2x $dst,$src\t!" %}
22293   ins_encode %{
22294     int vlen_enc = vector_length_encoding(this);
22295     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22296   %}
22297   ins_pipe( pipe_slow );
22298 %}
22299 
22300 instruct castStoX(vec dst, vec src) %{
22301   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22302             Matcher::vector_length(n->in(1)) <= 8 && // src
22303             Matcher::vector_element_basic_type(n) == T_BYTE);
22304   match(Set dst (VectorCastS2X src));
22305   format %{ "vector_cast_s2x $dst,$src" %}
22306   ins_encode %{
22307     assert(UseAVX > 0, "required");
22308 
22309     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
22310     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
22311   %}
22312   ins_pipe( pipe_slow );
22313 %}
22314 
22315 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22316   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22317             Matcher::vector_length(n->in(1)) == 16 && // src
22318             Matcher::vector_element_basic_type(n) == T_BYTE);
22319   effect(TEMP dst, TEMP vtmp);
22320   match(Set dst (VectorCastS2X src));
22321   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22322   ins_encode %{
22323     assert(UseAVX > 0, "required");
22324 
22325     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22326     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22327     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22328     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22329   %}
22330   ins_pipe( pipe_slow );
22331 %}
22332 
22333 instruct vcastStoX_evex(vec dst, vec src) %{
22334   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22335             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22336   match(Set dst (VectorCastS2X src));
22337   format %{ "vector_cast_s2x $dst,$src\t!" %}
22338   ins_encode %{
22339     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22340     int src_vlen_enc = vector_length_encoding(this, $src);
22341     int vlen_enc = vector_length_encoding(this);
22342     switch (to_elem_bt) {
22343       case T_BYTE:
22344         if (!VM_Version::supports_avx512vl()) {
22345           vlen_enc = Assembler::AVX_512bit;
22346         }
22347         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22348         break;
22349       case T_INT:
22350         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22351         break;
22352       case T_FLOAT:
22353         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22354         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22355         break;
22356       case T_LONG:
22357         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22358         break;
22359       case T_DOUBLE: {
22360         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22361         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22362         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22363         break;
22364       }
22365       default:
22366         ShouldNotReachHere();
22367     }
22368   %}
22369   ins_pipe( pipe_slow );
22370 %}
22371 
22372 instruct castItoX(vec dst, vec src) %{
22373   predicate(UseAVX <= 2 &&
22374             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22375             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22376   match(Set dst (VectorCastI2X src));
22377   format %{ "vector_cast_i2x $dst,$src" %}
22378   ins_encode %{
22379     assert(UseAVX > 0, "required");
22380 
22381     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22382     int vlen_enc = vector_length_encoding(this, $src);
22383 
22384     if (to_elem_bt == T_BYTE) {
22385       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22386       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22387       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22388     } else {
22389       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22390       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22391       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22392     }
22393   %}
22394   ins_pipe( pipe_slow );
22395 %}
22396 
22397 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22398   predicate(UseAVX <= 2 &&
22399             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22400             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22401   match(Set dst (VectorCastI2X src));
22402   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22403   effect(TEMP dst, TEMP vtmp);
22404   ins_encode %{
22405     assert(UseAVX > 0, "required");
22406 
22407     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22408     int vlen_enc = vector_length_encoding(this, $src);
22409 
22410     if (to_elem_bt == T_BYTE) {
22411       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22412       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22413       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22414       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22415     } else {
22416       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22417       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22418       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22419       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22420     }
22421   %}
22422   ins_pipe( pipe_slow );
22423 %}
22424 
22425 instruct vcastItoX_evex(vec dst, vec src) %{
22426   predicate(UseAVX > 2 ||
22427             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22428   match(Set dst (VectorCastI2X src));
22429   format %{ "vector_cast_i2x $dst,$src\t!" %}
22430   ins_encode %{
22431     assert(UseAVX > 0, "required");
22432 
22433     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22434     int src_vlen_enc = vector_length_encoding(this, $src);
22435     int dst_vlen_enc = vector_length_encoding(this);
22436     switch (dst_elem_bt) {
22437       case T_BYTE:
22438         if (!VM_Version::supports_avx512vl()) {
22439           src_vlen_enc = Assembler::AVX_512bit;
22440         }
22441         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22442         break;
22443       case T_SHORT:
22444         if (!VM_Version::supports_avx512vl()) {
22445           src_vlen_enc = Assembler::AVX_512bit;
22446         }
22447         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22448         break;
22449       case T_FLOAT:
22450         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22451         break;
22452       case T_LONG:
22453         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22454         break;
22455       case T_DOUBLE:
22456         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22457         break;
22458       default:
22459         ShouldNotReachHere();
22460     }
22461   %}
22462   ins_pipe( pipe_slow );
22463 %}
22464 
22465 instruct vcastLtoBS(vec dst, vec src) %{
22466   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22467             UseAVX <= 2);
22468   match(Set dst (VectorCastL2X src));
22469   format %{ "vector_cast_l2x  $dst,$src" %}
22470   ins_encode %{
22471     assert(UseAVX > 0, "required");
22472 
22473     int vlen = Matcher::vector_length_in_bytes(this, $src);
22474     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
22475     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22476                                                       : ExternalAddress(vector_int_to_short_mask());
22477     if (vlen <= 16) {
22478       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22479       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22480       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22481     } else {
22482       assert(vlen <= 32, "required");
22483       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22484       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22485       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22486       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22487     }
22488     if (to_elem_bt == T_BYTE) {
22489       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22490     }
22491   %}
22492   ins_pipe( pipe_slow );
22493 %}
22494 
22495 instruct vcastLtoX_evex(vec dst, vec src) %{
22496   predicate(UseAVX > 2 ||
22497             (Matcher::vector_element_basic_type(n) == T_INT ||
22498              Matcher::vector_element_basic_type(n) == T_FLOAT ||
22499              Matcher::vector_element_basic_type(n) == T_DOUBLE));
22500   match(Set dst (VectorCastL2X src));
22501   format %{ "vector_cast_l2x  $dst,$src\t!" %}
22502   ins_encode %{
22503     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22504     int vlen = Matcher::vector_length_in_bytes(this, $src);
22505     int vlen_enc = vector_length_encoding(this, $src);
22506     switch (to_elem_bt) {
22507       case T_BYTE:
22508         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22509           vlen_enc = Assembler::AVX_512bit;
22510         }
22511         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22512         break;
22513       case T_SHORT:
22514         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22515           vlen_enc = Assembler::AVX_512bit;
22516         }
22517         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22518         break;
22519       case T_INT:
22520         if (vlen == 8) {
22521           if ($dst$$XMMRegister != $src$$XMMRegister) {
22522             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22523           }
22524         } else if (vlen == 16) {
22525           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22526         } else if (vlen == 32) {
22527           if (UseAVX > 2) {
22528             if (!VM_Version::supports_avx512vl()) {
22529               vlen_enc = Assembler::AVX_512bit;
22530             }
22531             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22532           } else {
22533             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22534             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22535           }
22536         } else { // vlen == 64
22537           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22538         }
22539         break;
22540       case T_FLOAT:
22541         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22542         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22543         break;
22544       case T_DOUBLE:
22545         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22546         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22547         break;
22548 
22549       default: assert(false, "%s", type2name(to_elem_bt));
22550     }
22551   %}
22552   ins_pipe( pipe_slow );
22553 %}
22554 
22555 instruct vcastFtoD_reg(vec dst, vec src) %{
22556   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22557   match(Set dst (VectorCastF2X src));
22558   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22559   ins_encode %{
22560     int vlen_enc = vector_length_encoding(this);
22561     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22562   %}
22563   ins_pipe( pipe_slow );
22564 %}
22565 
22566 
22567 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22568   predicate(!VM_Version::supports_avx10_2() &&
22569             !VM_Version::supports_avx512vl() &&
22570             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22571             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22572             is_integral_type(Matcher::vector_element_basic_type(n)));
22573   match(Set dst (VectorCastF2X src));
22574   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22575   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22576   ins_encode %{
22577     int vlen_enc = vector_length_encoding(this, $src);
22578     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22579     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22580     // 32 bit addresses for register indirect addressing mode since stub constants
22581     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22582     // However, targets are free to increase this limit, but having a large code cache size
22583     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22584     // cap we save a temporary register allocation which in limiting case can prevent
22585     // spilling in high register pressure blocks.
22586     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22587                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22588                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22589   %}
22590   ins_pipe( pipe_slow );
22591 %}
22592 
22593 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22594   predicate(!VM_Version::supports_avx10_2() &&
22595             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22596             is_integral_type(Matcher::vector_element_basic_type(n)));
22597   match(Set dst (VectorCastF2X src));
22598   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22599   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22600   ins_encode %{
22601     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22602     if (to_elem_bt == T_LONG) {
22603       int vlen_enc = vector_length_encoding(this);
22604       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22605                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22606                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22607     } else {
22608       int vlen_enc = vector_length_encoding(this, $src);
22609       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22610                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22611                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22612     }
22613   %}
22614   ins_pipe( pipe_slow );
22615 %}
22616 
22617 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22618   predicate(VM_Version::supports_avx10_2() &&
22619             is_integral_type(Matcher::vector_element_basic_type(n)));
22620   match(Set dst (VectorCastF2X src));
22621   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22622   ins_encode %{
22623     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22624     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22625     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22626   %}
22627   ins_pipe( pipe_slow );
22628 %}
22629 
22630 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22631   predicate(VM_Version::supports_avx10_2() &&
22632             is_integral_type(Matcher::vector_element_basic_type(n)));
22633   match(Set dst (VectorCastF2X (LoadVector src)));
22634   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22635   ins_encode %{
22636     int vlen = Matcher::vector_length(this);
22637     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22638     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22639     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22640   %}
22641   ins_pipe( pipe_slow );
22642 %}
22643 
22644 instruct vcastDtoF_reg(vec dst, vec src) %{
22645   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22646   match(Set dst (VectorCastD2X src));
22647   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22648   ins_encode %{
22649     int vlen_enc = vector_length_encoding(this, $src);
22650     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22651   %}
22652   ins_pipe( pipe_slow );
22653 %}
22654 
22655 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22656   predicate(!VM_Version::supports_avx10_2() &&
22657             !VM_Version::supports_avx512vl() &&
22658             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22659             is_integral_type(Matcher::vector_element_basic_type(n)));
22660   match(Set dst (VectorCastD2X src));
22661   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22662   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22663   ins_encode %{
22664     int vlen_enc = vector_length_encoding(this, $src);
22665     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22666     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22667                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22668                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22669   %}
22670   ins_pipe( pipe_slow );
22671 %}
22672 
22673 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22674   predicate(!VM_Version::supports_avx10_2() &&
22675             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22676             is_integral_type(Matcher::vector_element_basic_type(n)));
22677   match(Set dst (VectorCastD2X src));
22678   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22679   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22680   ins_encode %{
22681     int vlen_enc = vector_length_encoding(this, $src);
22682     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22683     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22684                               ExternalAddress(vector_float_signflip());
22685     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22686                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22687   %}
22688   ins_pipe( pipe_slow );
22689 %}
22690 
22691 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22692   predicate(VM_Version::supports_avx10_2() &&
22693             is_integral_type(Matcher::vector_element_basic_type(n)));
22694   match(Set dst (VectorCastD2X src));
22695   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22696   ins_encode %{
22697     int vlen_enc = vector_length_encoding(this, $src);
22698     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22699     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22700   %}
22701   ins_pipe( pipe_slow );
22702 %}
22703 
22704 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22705   predicate(VM_Version::supports_avx10_2() &&
22706             is_integral_type(Matcher::vector_element_basic_type(n)));
22707   match(Set dst (VectorCastD2X (LoadVector src)));
22708   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22709   ins_encode %{
22710     int vlen = Matcher::vector_length(this);
22711     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22712     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22713     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22714   %}
22715   ins_pipe( pipe_slow );
22716 %}
22717 
22718 instruct vucast(vec dst, vec src) %{
22719   match(Set dst (VectorUCastB2X src));
22720   match(Set dst (VectorUCastS2X src));
22721   match(Set dst (VectorUCastI2X src));
22722   format %{ "vector_ucast $dst,$src\t!" %}
22723   ins_encode %{
22724     assert(UseAVX > 0, "required");
22725 
22726     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22727     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22728     int vlen_enc = vector_length_encoding(this);
22729     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22730   %}
22731   ins_pipe( pipe_slow );
22732 %}
22733 
22734 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22735   predicate(!VM_Version::supports_avx512vl() &&
22736             Matcher::vector_length_in_bytes(n) < 64 &&
22737             Matcher::vector_element_basic_type(n) == T_INT);
22738   match(Set dst (RoundVF src));
22739   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22740   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22741   ins_encode %{
22742     int vlen_enc = vector_length_encoding(this);
22743     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22744     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22745                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22746                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22747   %}
22748   ins_pipe( pipe_slow );
22749 %}
22750 
22751 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22752   predicate((VM_Version::supports_avx512vl() ||
22753              Matcher::vector_length_in_bytes(n) == 64) &&
22754              Matcher::vector_element_basic_type(n) == T_INT);
22755   match(Set dst (RoundVF src));
22756   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22757   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22758   ins_encode %{
22759     int vlen_enc = vector_length_encoding(this);
22760     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22761     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22762                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22763                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22764   %}
22765   ins_pipe( pipe_slow );
22766 %}
22767 
22768 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22769   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22770   match(Set dst (RoundVD src));
22771   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22772   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22773   ins_encode %{
22774     int vlen_enc = vector_length_encoding(this);
22775     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22776     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22777                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22778                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22779   %}
22780   ins_pipe( pipe_slow );
22781 %}
22782 
22783 // --------------------------------- VectorMaskCmp --------------------------------------
22784 
22785 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22786   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22787             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22788             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22789             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22790   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22791   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22792   ins_encode %{
22793     int vlen_enc = vector_length_encoding(this, $src1);
22794     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22795     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22796       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22797     } else {
22798       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22799     }
22800   %}
22801   ins_pipe( pipe_slow );
22802 %}
22803 
22804 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22805   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22806             n->bottom_type()->isa_vectmask() == nullptr &&
22807             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22808   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22809   effect(TEMP ktmp);
22810   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22811   ins_encode %{
22812     int vlen_enc = Assembler::AVX_512bit;
22813     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22814     KRegister mask = k0; // The comparison itself is not being masked.
22815     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22816       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22817       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22818     } else {
22819       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22820       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22821     }
22822   %}
22823   ins_pipe( pipe_slow );
22824 %}
22825 
22826 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22827   predicate(n->bottom_type()->isa_vectmask() &&
22828             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22829   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22830   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22831   ins_encode %{
22832     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22833     int vlen_enc = vector_length_encoding(this, $src1);
22834     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22835     KRegister mask = k0; // The comparison itself is not being masked.
22836     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22837       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22838     } else {
22839       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22840     }
22841   %}
22842   ins_pipe( pipe_slow );
22843 %}
22844 
22845 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22846   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22847             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22848             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22849             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22850             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22851             (n->in(2)->get_int() == BoolTest::eq ||
22852              n->in(2)->get_int() == BoolTest::lt ||
22853              n->in(2)->get_int() == BoolTest::gt)); // cond
22854   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22855   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22856   ins_encode %{
22857     int vlen_enc = vector_length_encoding(this, $src1);
22858     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22859     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22860     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22861   %}
22862   ins_pipe( pipe_slow );
22863 %}
22864 
22865 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22866   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22867             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22868             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22869             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22870             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22871             (n->in(2)->get_int() == BoolTest::ne ||
22872              n->in(2)->get_int() == BoolTest::le ||
22873              n->in(2)->get_int() == BoolTest::ge)); // cond
22874   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22875   effect(TEMP dst, TEMP xtmp);
22876   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22877   ins_encode %{
22878     int vlen_enc = vector_length_encoding(this, $src1);
22879     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22880     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22881     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22882   %}
22883   ins_pipe( pipe_slow );
22884 %}
22885 
22886 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22887   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22888             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22889             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22890             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22891             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22892   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22893   effect(TEMP dst, TEMP xtmp);
22894   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22895   ins_encode %{
22896     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22897     int vlen_enc = vector_length_encoding(this, $src1);
22898     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22899     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22900 
22901     if (vlen_enc == Assembler::AVX_128bit) {
22902       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22903     } else {
22904       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22905     }
22906     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22907     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22908     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22909   %}
22910   ins_pipe( pipe_slow );
22911 %}
22912 
22913 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22914   predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22915              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22916              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22917   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22918   effect(TEMP ktmp);
22919   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22920   ins_encode %{
22921     assert(UseAVX > 2, "required");
22922 
22923     int vlen_enc = vector_length_encoding(this, $src1);
22924     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22925     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22926     KRegister mask = k0; // The comparison itself is not being masked.
22927     bool merge = false;
22928     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22929 
22930     switch (src1_elem_bt) {
22931       case T_INT: {
22932         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22933         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22934         break;
22935       }
22936       case T_LONG: {
22937         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22938         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22939         break;
22940       }
22941       default: assert(false, "%s", type2name(src1_elem_bt));
22942     }
22943   %}
22944   ins_pipe( pipe_slow );
22945 %}
22946 
22947 
22948 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22949   predicate(n->bottom_type()->isa_vectmask() &&
22950             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22951   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22952   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22953   ins_encode %{
22954     assert(UseAVX > 2, "required");
22955     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22956 
22957     int vlen_enc = vector_length_encoding(this, $src1);
22958     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22959     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22960     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22961 
22962     // Comparison i
22963     switch (src1_elem_bt) {
22964       case T_BYTE: {
22965         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22966         break;
22967       }
22968       case T_SHORT: {
22969         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22970         break;
22971       }
22972       case T_INT: {
22973         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22974         break;
22975       }
22976       case T_LONG: {
22977         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22978         break;
22979       }
22980       default: assert(false, "%s", type2name(src1_elem_bt));
22981     }
22982   %}
22983   ins_pipe( pipe_slow );
22984 %}
22985 
22986 // Extract
22987 
22988 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22989   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22990   match(Set dst (ExtractI src idx));
22991   match(Set dst (ExtractS src idx));
22992   match(Set dst (ExtractB src idx));
22993   format %{ "extractI $dst,$src,$idx\t!" %}
22994   ins_encode %{
22995     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22996 
22997     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22998     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22999   %}
23000   ins_pipe( pipe_slow );
23001 %}
23002 
23003 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
23004   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
23005             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
23006   match(Set dst (ExtractI src idx));
23007   match(Set dst (ExtractS src idx));
23008   match(Set dst (ExtractB src idx));
23009   effect(TEMP vtmp);
23010   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
23011   ins_encode %{
23012     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23013 
23014     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
23015     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23016     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
23017   %}
23018   ins_pipe( pipe_slow );
23019 %}
23020 
23021 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
23022   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
23023   match(Set dst (ExtractL src idx));
23024   format %{ "extractL $dst,$src,$idx\t!" %}
23025   ins_encode %{
23026     assert(UseSSE >= 4, "required");
23027     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23028 
23029     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
23030   %}
23031   ins_pipe( pipe_slow );
23032 %}
23033 
23034 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
23035   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
23036             Matcher::vector_length(n->in(1)) == 8);  // src
23037   match(Set dst (ExtractL src idx));
23038   effect(TEMP vtmp);
23039   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
23040   ins_encode %{
23041     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23042 
23043     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23044     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
23045   %}
23046   ins_pipe( pipe_slow );
23047 %}
23048 
23049 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
23050   predicate(Matcher::vector_length(n->in(1)) <= 4);
23051   match(Set dst (ExtractF src idx));
23052   effect(TEMP dst, TEMP vtmp);
23053   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
23054   ins_encode %{
23055     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23056 
23057     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
23058   %}
23059   ins_pipe( pipe_slow );
23060 %}
23061 
23062 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
23063   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
23064             Matcher::vector_length(n->in(1)/*src*/) == 16);
23065   match(Set dst (ExtractF src idx));
23066   effect(TEMP vtmp);
23067   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
23068   ins_encode %{
23069     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23070 
23071     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23072     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
23073   %}
23074   ins_pipe( pipe_slow );
23075 %}
23076 
23077 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
23078   predicate(Matcher::vector_length(n->in(1)) == 2); // src
23079   match(Set dst (ExtractD src idx));
23080   format %{ "extractD $dst,$src,$idx\t!" %}
23081   ins_encode %{
23082     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23083 
23084     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23085   %}
23086   ins_pipe( pipe_slow );
23087 %}
23088 
23089 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
23090   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
23091             Matcher::vector_length(n->in(1)) == 8);  // src
23092   match(Set dst (ExtractD src idx));
23093   effect(TEMP vtmp);
23094   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
23095   ins_encode %{
23096     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23097 
23098     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23099     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
23100   %}
23101   ins_pipe( pipe_slow );
23102 %}
23103 
23104 // --------------------------------- Vector Blend --------------------------------------
23105 
23106 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
23107   predicate(UseAVX == 0);
23108   match(Set dst (VectorBlend (Binary dst src) mask));
23109   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
23110   effect(TEMP tmp);
23111   ins_encode %{
23112     assert(UseSSE >= 4, "required");
23113 
23114     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
23115       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
23116     }
23117     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
23118   %}
23119   ins_pipe( pipe_slow );
23120 %}
23121 
23122 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
23123   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23124             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23125             Matcher::vector_length_in_bytes(n) <= 32 &&
23126             is_integral_type(Matcher::vector_element_basic_type(n)));
23127   match(Set dst (VectorBlend (Binary src1 src2) mask));
23128   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
23129   ins_encode %{
23130     int vlen_enc = vector_length_encoding(this);
23131     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23132   %}
23133   ins_pipe( pipe_slow );
23134 %}
23135 
23136 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
23137   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23138             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23139             Matcher::vector_length_in_bytes(n) <= 32 &&
23140             !is_integral_type(Matcher::vector_element_basic_type(n)));
23141   match(Set dst (VectorBlend (Binary src1 src2) mask));
23142   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
23143   ins_encode %{
23144     int vlen_enc = vector_length_encoding(this);
23145     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23146   %}
23147   ins_pipe( pipe_slow );
23148 %}
23149 
23150 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
23151   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
23152             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23153             Matcher::vector_length_in_bytes(n) <= 32);
23154   match(Set dst (VectorBlend (Binary src1 src2) mask));
23155   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
23156   effect(TEMP vtmp, TEMP dst);
23157   ins_encode %{
23158     int vlen_enc = vector_length_encoding(this);
23159     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
23160     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23161     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
23162   %}
23163   ins_pipe( pipe_slow );
23164 %}
23165 
23166 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
23167   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
23168             n->in(2)->bottom_type()->isa_vectmask() == nullptr);
23169   match(Set dst (VectorBlend (Binary src1 src2) mask));
23170   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23171   effect(TEMP ktmp);
23172   ins_encode %{
23173      int vlen_enc = Assembler::AVX_512bit;
23174      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23175     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
23176     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23177   %}
23178   ins_pipe( pipe_slow );
23179 %}
23180 
23181 
23182 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
23183   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
23184             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
23185              VM_Version::supports_avx512bw()));
23186   match(Set dst (VectorBlend (Binary src1 src2) mask));
23187   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23188   ins_encode %{
23189     int vlen_enc = vector_length_encoding(this);
23190     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23191     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23192   %}
23193   ins_pipe( pipe_slow );
23194 %}
23195 
23196 // --------------------------------- ABS --------------------------------------
23197 // a = |a|
23198 instruct vabsB_reg(vec dst, vec src) %{
23199   match(Set dst (AbsVB  src));
23200   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
23201   ins_encode %{
23202     uint vlen = Matcher::vector_length(this);
23203     if (vlen <= 16) {
23204       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23205     } else {
23206       int vlen_enc = vector_length_encoding(this);
23207       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23208     }
23209   %}
23210   ins_pipe( pipe_slow );
23211 %}
23212 
23213 instruct vabsS_reg(vec dst, vec src) %{
23214   match(Set dst (AbsVS  src));
23215   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
23216   ins_encode %{
23217     uint vlen = Matcher::vector_length(this);
23218     if (vlen <= 8) {
23219       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23220     } else {
23221       int vlen_enc = vector_length_encoding(this);
23222       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23223     }
23224   %}
23225   ins_pipe( pipe_slow );
23226 %}
23227 
23228 instruct vabsI_reg(vec dst, vec src) %{
23229   match(Set dst (AbsVI  src));
23230   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
23231   ins_encode %{
23232     uint vlen = Matcher::vector_length(this);
23233     if (vlen <= 4) {
23234       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23235     } else {
23236       int vlen_enc = vector_length_encoding(this);
23237       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23238     }
23239   %}
23240   ins_pipe( pipe_slow );
23241 %}
23242 
23243 instruct vabsL_reg(vec dst, vec src) %{
23244   match(Set dst (AbsVL  src));
23245   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
23246   ins_encode %{
23247     assert(UseAVX > 2, "required");
23248     int vlen_enc = vector_length_encoding(this);
23249     if (!VM_Version::supports_avx512vl()) {
23250       vlen_enc = Assembler::AVX_512bit;
23251     }
23252     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23253   %}
23254   ins_pipe( pipe_slow );
23255 %}
23256 
23257 // --------------------------------- ABSNEG --------------------------------------
23258 
23259 instruct vabsnegF(vec dst, vec src) %{
23260   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
23261   match(Set dst (AbsVF src));
23262   match(Set dst (NegVF src));
23263   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
23264   ins_cost(150);
23265   ins_encode %{
23266     int opcode = this->ideal_Opcode();
23267     int vlen = Matcher::vector_length(this);
23268     if (vlen == 2) {
23269       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23270     } else {
23271       assert(vlen == 8 || vlen == 16, "required");
23272       int vlen_enc = vector_length_encoding(this);
23273       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23274     }
23275   %}
23276   ins_pipe( pipe_slow );
23277 %}
23278 
23279 instruct vabsneg4F(vec dst) %{
23280   predicate(Matcher::vector_length(n) == 4);
23281   match(Set dst (AbsVF dst));
23282   match(Set dst (NegVF dst));
23283   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
23284   ins_cost(150);
23285   ins_encode %{
23286     int opcode = this->ideal_Opcode();
23287     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
23288   %}
23289   ins_pipe( pipe_slow );
23290 %}
23291 
23292 instruct vabsnegD(vec dst, vec src) %{
23293   match(Set dst (AbsVD  src));
23294   match(Set dst (NegVD  src));
23295   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
23296   ins_encode %{
23297     int opcode = this->ideal_Opcode();
23298     uint vlen = Matcher::vector_length(this);
23299     if (vlen == 2) {
23300       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23301     } else {
23302       int vlen_enc = vector_length_encoding(this);
23303       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23304     }
23305   %}
23306   ins_pipe( pipe_slow );
23307 %}
23308 
23309 //------------------------------------- VectorTest --------------------------------------------
23310 
23311 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
23312   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
23313   match(Set cr (VectorTest src1 src2));
23314   effect(TEMP vtmp);
23315   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
23316   ins_encode %{
23317     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23318     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23319     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23320   %}
23321   ins_pipe( pipe_slow );
23322 %}
23323 
23324 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23325   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23326   match(Set cr (VectorTest src1 src2));
23327   format %{ "vptest_ge16  $src1, $src2\n\t" %}
23328   ins_encode %{
23329     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23330     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23331     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23332   %}
23333   ins_pipe( pipe_slow );
23334 %}
23335 
23336 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23337   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23338              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23339             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23340   match(Set cr (VectorTest src1 src2));
23341   effect(TEMP tmp);
23342   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23343   ins_encode %{
23344     uint masklen = Matcher::vector_length(this, $src1);
23345     __ kmovwl($tmp$$Register, $src1$$KRegister);
23346     __ andl($tmp$$Register, (1 << masklen) - 1);
23347     __ cmpl($tmp$$Register, (1 << masklen) - 1);
23348   %}
23349   ins_pipe( pipe_slow );
23350 %}
23351 
23352 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23353   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23354              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23355             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23356   match(Set cr (VectorTest src1 src2));
23357   effect(TEMP tmp);
23358   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23359   ins_encode %{
23360     uint masklen = Matcher::vector_length(this, $src1);
23361     __ kmovwl($tmp$$Register, $src1$$KRegister);
23362     __ andl($tmp$$Register, (1 << masklen) - 1);
23363   %}
23364   ins_pipe( pipe_slow );
23365 %}
23366 
23367 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23368   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23369             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23370   match(Set cr (VectorTest src1 src2));
23371   format %{ "ktest_ge8  $src1, $src2\n\t" %}
23372   ins_encode %{
23373     uint masklen = Matcher::vector_length(this, $src1);
23374     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23375   %}
23376   ins_pipe( pipe_slow );
23377 %}
23378 
23379 //------------------------------------- LoadMask --------------------------------------------
23380 
23381 instruct loadMask(legVec dst, legVec src) %{
23382   predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23383   match(Set dst (VectorLoadMask src));
23384   effect(TEMP dst);
23385   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23386   ins_encode %{
23387     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23388     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23389     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23390   %}
23391   ins_pipe( pipe_slow );
23392 %}
23393 
23394 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23395   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23396   match(Set dst (VectorLoadMask src));
23397   effect(TEMP xtmp);
23398   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23399   ins_encode %{
23400     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23401                         true, Assembler::AVX_512bit);
23402   %}
23403   ins_pipe( pipe_slow );
23404 %}
23405 
23406 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
23407   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23408   match(Set dst (VectorLoadMask src));
23409   effect(TEMP xtmp);
23410   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23411   ins_encode %{
23412     int vlen_enc = vector_length_encoding(in(1));
23413     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23414                         false, vlen_enc);
23415   %}
23416   ins_pipe( pipe_slow );
23417 %}
23418 
23419 //------------------------------------- StoreMask --------------------------------------------
23420 
23421 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23422   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23423   match(Set dst (VectorStoreMask src size));
23424   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23425   ins_encode %{
23426     int vlen = Matcher::vector_length(this);
23427     if (vlen <= 16 && UseAVX <= 2) {
23428       assert(UseSSE >= 3, "required");
23429       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23430     } else {
23431       assert(UseAVX > 0, "required");
23432       int src_vlen_enc = vector_length_encoding(this, $src);
23433       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23434     }
23435   %}
23436   ins_pipe( pipe_slow );
23437 %}
23438 
23439 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23440   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23441   match(Set dst (VectorStoreMask src size));
23442   effect(TEMP_DEF dst, TEMP xtmp);
23443   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23444   ins_encode %{
23445     int vlen_enc = Assembler::AVX_128bit;
23446     int vlen = Matcher::vector_length(this);
23447     if (vlen <= 8) {
23448       assert(UseSSE >= 3, "required");
23449       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23450       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23451       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23452     } else {
23453       assert(UseAVX > 0, "required");
23454       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23455       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23456       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23457     }
23458   %}
23459   ins_pipe( pipe_slow );
23460 %}
23461 
23462 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23463   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23464   match(Set dst (VectorStoreMask src size));
23465   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23466   effect(TEMP_DEF dst, TEMP xtmp);
23467   ins_encode %{
23468     int vlen_enc = Assembler::AVX_128bit;
23469     int vlen = Matcher::vector_length(this);
23470     if (vlen <= 4) {
23471       assert(UseSSE >= 3, "required");
23472       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23473       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23474       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23475       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23476     } else {
23477       assert(UseAVX > 0, "required");
23478       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23479       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23480       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23481       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23482       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23483     }
23484   %}
23485   ins_pipe( pipe_slow );
23486 %}
23487 
23488 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23489   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23490   match(Set dst (VectorStoreMask src size));
23491   effect(TEMP_DEF dst, TEMP xtmp);
23492   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23493   ins_encode %{
23494     assert(UseSSE >= 3, "required");
23495     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23496     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23497     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23498     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23499     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23500   %}
23501   ins_pipe( pipe_slow );
23502 %}
23503 
23504 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23505   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23506   match(Set dst (VectorStoreMask src size));
23507   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23508   effect(TEMP_DEF dst, TEMP vtmp);
23509   ins_encode %{
23510     int vlen_enc = Assembler::AVX_128bit;
23511     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23512     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23513     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23514     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23515     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23516     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23517     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23518   %}
23519   ins_pipe( pipe_slow );
23520 %}
23521 
23522 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23523   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23524   match(Set dst (VectorStoreMask src size));
23525   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23526   ins_encode %{
23527     int src_vlen_enc = vector_length_encoding(this, $src);
23528     int dst_vlen_enc = vector_length_encoding(this);
23529     if (!VM_Version::supports_avx512vl()) {
23530       src_vlen_enc = Assembler::AVX_512bit;
23531     }
23532     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23533     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23534   %}
23535   ins_pipe( pipe_slow );
23536 %}
23537 
23538 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23539   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23540   match(Set dst (VectorStoreMask src size));
23541   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23542   ins_encode %{
23543     int src_vlen_enc = vector_length_encoding(this, $src);
23544     int dst_vlen_enc = vector_length_encoding(this);
23545     if (!VM_Version::supports_avx512vl()) {
23546       src_vlen_enc = Assembler::AVX_512bit;
23547     }
23548     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23549     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23550   %}
23551   ins_pipe( pipe_slow );
23552 %}
23553 
23554 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23555   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23556   match(Set dst (VectorStoreMask mask size));
23557   effect(TEMP_DEF dst);
23558   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23559   ins_encode %{
23560     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23561     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23562                  false, Assembler::AVX_512bit, noreg);
23563     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23564   %}
23565   ins_pipe( pipe_slow );
23566 %}
23567 
23568 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23569   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23570   match(Set dst (VectorStoreMask mask size));
23571   effect(TEMP_DEF dst);
23572   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23573   ins_encode %{
23574     int dst_vlen_enc = vector_length_encoding(this);
23575     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23576     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23577   %}
23578   ins_pipe( pipe_slow );
23579 %}
23580 
23581 instruct vmaskcast_evex(kReg dst) %{
23582   match(Set dst (VectorMaskCast dst));
23583   ins_cost(0);
23584   format %{ "vector_mask_cast $dst" %}
23585   ins_encode %{
23586     // empty
23587   %}
23588   ins_pipe(empty);
23589 %}
23590 
23591 instruct vmaskcast(vec dst) %{
23592   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23593   match(Set dst (VectorMaskCast dst));
23594   ins_cost(0);
23595   format %{ "vector_mask_cast $dst" %}
23596   ins_encode %{
23597     // empty
23598   %}
23599   ins_pipe(empty);
23600 %}
23601 
23602 instruct vmaskcast_avx(vec dst, vec src) %{
23603   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23604   match(Set dst (VectorMaskCast src));
23605   format %{ "vector_mask_cast $dst, $src" %}
23606   ins_encode %{
23607     int vlen = Matcher::vector_length(this);
23608     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23609     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23610     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23611   %}
23612   ins_pipe(pipe_slow);
23613 %}
23614 
23615 //-------------------------------- Load Iota Indices ----------------------------------
23616 
23617 instruct loadIotaIndices(vec dst, immI_0 src) %{
23618   match(Set dst (VectorLoadConst src));
23619   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23620   ins_encode %{
23621      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23622      BasicType bt = Matcher::vector_element_basic_type(this);
23623      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23624   %}
23625   ins_pipe( pipe_slow );
23626 %}
23627 
23628 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23629   match(Set dst (PopulateIndex src1 src2));
23630   effect(TEMP dst, TEMP vtmp);
23631   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23632   ins_encode %{
23633      assert($src2$$constant == 1, "required");
23634      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23635      int vlen_enc = vector_length_encoding(this);
23636      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23637      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23638      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23639      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23640   %}
23641   ins_pipe( pipe_slow );
23642 %}
23643 
23644 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23645   match(Set dst (PopulateIndex src1 src2));
23646   effect(TEMP dst, TEMP vtmp);
23647   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23648   ins_encode %{
23649      assert($src2$$constant == 1, "required");
23650      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23651      int vlen_enc = vector_length_encoding(this);
23652      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23653      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23654      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23655      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23656   %}
23657   ins_pipe( pipe_slow );
23658 %}
23659 
23660 //-------------------------------- Rearrange ----------------------------------
23661 
23662 // LoadShuffle/Rearrange for Byte
23663 instruct rearrangeB(vec dst, vec shuffle) %{
23664   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23665             Matcher::vector_length(n) < 32);
23666   match(Set dst (VectorRearrange dst shuffle));
23667   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23668   ins_encode %{
23669     assert(UseSSE >= 4, "required");
23670     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23671   %}
23672   ins_pipe( pipe_slow );
23673 %}
23674 
23675 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23676   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23677             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23678   match(Set dst (VectorRearrange src shuffle));
23679   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23680   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23681   ins_encode %{
23682     assert(UseAVX >= 2, "required");
23683     // Swap src into vtmp1
23684     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23685     // Shuffle swapped src to get entries from other 128 bit lane
23686     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23687     // Shuffle original src to get entries from self 128 bit lane
23688     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23689     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23690     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23691     // Perform the blend
23692     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23693   %}
23694   ins_pipe( pipe_slow );
23695 %}
23696 
23697 
23698 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23699   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23700             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23701   match(Set dst (VectorRearrange src shuffle));
23702   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23703   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23704   ins_encode %{
23705     int vlen_enc = vector_length_encoding(this);
23706     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23707                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23708                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23709   %}
23710   ins_pipe( pipe_slow );
23711 %}
23712 
23713 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23714   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23715             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23716   match(Set dst (VectorRearrange src shuffle));
23717   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23718   ins_encode %{
23719     int vlen_enc = vector_length_encoding(this);
23720     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23721   %}
23722   ins_pipe( pipe_slow );
23723 %}
23724 
23725 // LoadShuffle/Rearrange for Short
23726 
23727 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23728   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23729             !VM_Version::supports_avx512bw());
23730   match(Set dst (VectorLoadShuffle src));
23731   effect(TEMP dst, TEMP vtmp);
23732   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23733   ins_encode %{
23734     // Create a byte shuffle mask from short shuffle mask
23735     // only byte shuffle instruction available on these platforms
23736     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23737     if (UseAVX == 0) {
23738       assert(vlen_in_bytes <= 16, "required");
23739       // Multiply each shuffle by two to get byte index
23740       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23741       __ psllw($vtmp$$XMMRegister, 1);
23742 
23743       // Duplicate to create 2 copies of byte index
23744       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23745       __ psllw($dst$$XMMRegister, 8);
23746       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23747 
23748       // Add one to get alternate byte index
23749       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23750       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23751     } else {
23752       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23753       int vlen_enc = vector_length_encoding(this);
23754       // Multiply each shuffle by two to get byte index
23755       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23756 
23757       // Duplicate to create 2 copies of byte index
23758       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23759       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23760 
23761       // Add one to get alternate byte index
23762       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23763     }
23764   %}
23765   ins_pipe( pipe_slow );
23766 %}
23767 
23768 instruct rearrangeS(vec dst, vec shuffle) %{
23769   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23770             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23771   match(Set dst (VectorRearrange dst shuffle));
23772   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23773   ins_encode %{
23774     assert(UseSSE >= 4, "required");
23775     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23776   %}
23777   ins_pipe( pipe_slow );
23778 %}
23779 
23780 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23781   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23782             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23783   match(Set dst (VectorRearrange src shuffle));
23784   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23785   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23786   ins_encode %{
23787     assert(UseAVX >= 2, "required");
23788     // Swap src into vtmp1
23789     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23790     // Shuffle swapped src to get entries from other 128 bit lane
23791     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23792     // Shuffle original src to get entries from self 128 bit lane
23793     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23794     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23795     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23796     // Perform the blend
23797     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23798   %}
23799   ins_pipe( pipe_slow );
23800 %}
23801 
23802 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23803   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23804             VM_Version::supports_avx512bw());
23805   match(Set dst (VectorRearrange src shuffle));
23806   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23807   ins_encode %{
23808     int vlen_enc = vector_length_encoding(this);
23809     if (!VM_Version::supports_avx512vl()) {
23810       vlen_enc = Assembler::AVX_512bit;
23811     }
23812     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23813   %}
23814   ins_pipe( pipe_slow );
23815 %}
23816 
23817 // LoadShuffle/Rearrange for Integer and Float
23818 
23819 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23820   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23821             Matcher::vector_length(n) == 4 && UseAVX == 0);
23822   match(Set dst (VectorLoadShuffle src));
23823   effect(TEMP dst, TEMP vtmp);
23824   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23825   ins_encode %{
23826     assert(UseSSE >= 4, "required");
23827 
23828     // Create a byte shuffle mask from int shuffle mask
23829     // only byte shuffle instruction available on these platforms
23830 
23831     // Duplicate and multiply each shuffle by 4
23832     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23833     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23834     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23835     __ psllw($vtmp$$XMMRegister, 2);
23836 
23837     // Duplicate again to create 4 copies of byte index
23838     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23839     __ psllw($dst$$XMMRegister, 8);
23840     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23841 
23842     // Add 3,2,1,0 to get alternate byte index
23843     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23844     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23845   %}
23846   ins_pipe( pipe_slow );
23847 %}
23848 
23849 instruct rearrangeI(vec dst, vec shuffle) %{
23850   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23851             UseAVX == 0);
23852   match(Set dst (VectorRearrange dst shuffle));
23853   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23854   ins_encode %{
23855     assert(UseSSE >= 4, "required");
23856     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23857   %}
23858   ins_pipe( pipe_slow );
23859 %}
23860 
23861 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23862   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23863             UseAVX > 0);
23864   match(Set dst (VectorRearrange src shuffle));
23865   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23866   ins_encode %{
23867     int vlen_enc = vector_length_encoding(this);
23868     BasicType bt = Matcher::vector_element_basic_type(this);
23869     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23870   %}
23871   ins_pipe( pipe_slow );
23872 %}
23873 
23874 // LoadShuffle/Rearrange for Long and Double
23875 
23876 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23877   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23878             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23879   match(Set dst (VectorLoadShuffle src));
23880   effect(TEMP dst, TEMP vtmp);
23881   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23882   ins_encode %{
23883     assert(UseAVX >= 2, "required");
23884 
23885     int vlen_enc = vector_length_encoding(this);
23886     // Create a double word shuffle mask from long shuffle mask
23887     // only double word shuffle instruction available on these platforms
23888 
23889     // Multiply each shuffle by two to get double word index
23890     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23891 
23892     // Duplicate each double word shuffle
23893     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23894     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23895 
23896     // Add one to get alternate double word index
23897     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23898   %}
23899   ins_pipe( pipe_slow );
23900 %}
23901 
23902 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23903   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23904             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23905   match(Set dst (VectorRearrange src shuffle));
23906   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23907   ins_encode %{
23908     assert(UseAVX >= 2, "required");
23909 
23910     int vlen_enc = vector_length_encoding(this);
23911     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23912   %}
23913   ins_pipe( pipe_slow );
23914 %}
23915 
23916 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23917   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23918             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23919   match(Set dst (VectorRearrange src shuffle));
23920   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23921   ins_encode %{
23922     assert(UseAVX > 2, "required");
23923 
23924     int vlen_enc = vector_length_encoding(this);
23925     if (vlen_enc == Assembler::AVX_128bit) {
23926       vlen_enc = Assembler::AVX_256bit;
23927     }
23928     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23929   %}
23930   ins_pipe( pipe_slow );
23931 %}
23932 
23933 // --------------------------------- FMA --------------------------------------
23934 // a * b + c
23935 
23936 instruct vfmaF_reg(vec a, vec b, vec c) %{
23937   match(Set c (FmaVF  c (Binary a b)));
23938   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23939   ins_cost(150);
23940   ins_encode %{
23941     assert(UseFMA, "not enabled");
23942     int vlen_enc = vector_length_encoding(this);
23943     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23944   %}
23945   ins_pipe( pipe_slow );
23946 %}
23947 
23948 instruct vfmaF_mem(vec a, memory b, vec c) %{
23949   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23950   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23951   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23952   ins_cost(150);
23953   ins_encode %{
23954     assert(UseFMA, "not enabled");
23955     int vlen_enc = vector_length_encoding(this);
23956     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23957   %}
23958   ins_pipe( pipe_slow );
23959 %}
23960 
23961 instruct vfmaD_reg(vec a, vec b, vec c) %{
23962   match(Set c (FmaVD  c (Binary a b)));
23963   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23964   ins_cost(150);
23965   ins_encode %{
23966     assert(UseFMA, "not enabled");
23967     int vlen_enc = vector_length_encoding(this);
23968     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23969   %}
23970   ins_pipe( pipe_slow );
23971 %}
23972 
23973 instruct vfmaD_mem(vec a, memory b, vec c) %{
23974   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23975   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23976   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23977   ins_cost(150);
23978   ins_encode %{
23979     assert(UseFMA, "not enabled");
23980     int vlen_enc = vector_length_encoding(this);
23981     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23982   %}
23983   ins_pipe( pipe_slow );
23984 %}
23985 
23986 // --------------------------------- Vector Multiply Add --------------------------------------
23987 
23988 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23989   predicate(UseAVX == 0);
23990   match(Set dst (MulAddVS2VI dst src1));
23991   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23992   ins_encode %{
23993     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23994   %}
23995   ins_pipe( pipe_slow );
23996 %}
23997 
23998 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23999   predicate(UseAVX > 0);
24000   match(Set dst (MulAddVS2VI src1 src2));
24001   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
24002   ins_encode %{
24003     int vlen_enc = vector_length_encoding(this);
24004     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
24005   %}
24006   ins_pipe( pipe_slow );
24007 %}
24008 
24009 // --------------------------------- Vector Multiply Add Add ----------------------------------
24010 
24011 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
24012   predicate(VM_Version::supports_avx512_vnni());
24013   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
24014   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
24015   ins_encode %{
24016     assert(UseAVX > 2, "required");
24017     int vlen_enc = vector_length_encoding(this);
24018     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
24019   %}
24020   ins_pipe( pipe_slow );
24021   ins_cost(10);
24022 %}
24023 
24024 // --------------------------------- PopCount --------------------------------------
24025 
24026 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
24027   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24028   match(Set dst (PopCountVI src));
24029   match(Set dst (PopCountVL src));
24030   format %{ "vector_popcount_integral $dst, $src" %}
24031   ins_encode %{
24032     int opcode = this->ideal_Opcode();
24033     int vlen_enc = vector_length_encoding(this, $src);
24034     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24035     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
24036   %}
24037   ins_pipe( pipe_slow );
24038 %}
24039 
24040 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
24041   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24042   match(Set dst (PopCountVI src mask));
24043   match(Set dst (PopCountVL src mask));
24044   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
24045   ins_encode %{
24046     int vlen_enc = vector_length_encoding(this, $src);
24047     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24048     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24049     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
24050   %}
24051   ins_pipe( pipe_slow );
24052 %}
24053 
24054 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
24055   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24056   match(Set dst (PopCountVI src));
24057   match(Set dst (PopCountVL src));
24058   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24059   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
24060   ins_encode %{
24061     int opcode = this->ideal_Opcode();
24062     int vlen_enc = vector_length_encoding(this, $src);
24063     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24064     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24065                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
24066   %}
24067   ins_pipe( pipe_slow );
24068 %}
24069 
24070 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
24071 
24072 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
24073   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24074                                               Matcher::vector_length_in_bytes(n->in(1))));
24075   match(Set dst (CountTrailingZerosV src));
24076   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
24077   ins_cost(400);
24078   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
24079   ins_encode %{
24080     int vlen_enc = vector_length_encoding(this, $src);
24081     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24082     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24083                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24084   %}
24085   ins_pipe( pipe_slow );
24086 %}
24087 
24088 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24089   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24090             VM_Version::supports_avx512cd() &&
24091             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24092   match(Set dst (CountTrailingZerosV src));
24093   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24094   ins_cost(400);
24095   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
24096   ins_encode %{
24097     int vlen_enc = vector_length_encoding(this, $src);
24098     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24099     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24100                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24101   %}
24102   ins_pipe( pipe_slow );
24103 %}
24104 
24105 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
24106   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24107   match(Set dst (CountTrailingZerosV src));
24108   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
24109   ins_cost(400);
24110   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
24111   ins_encode %{
24112     int vlen_enc = vector_length_encoding(this, $src);
24113     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24114     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24115                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
24116                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
24117   %}
24118   ins_pipe( pipe_slow );
24119 %}
24120 
24121 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24122   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24123   match(Set dst (CountTrailingZerosV src));
24124   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24125   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24126   ins_encode %{
24127     int vlen_enc = vector_length_encoding(this, $src);
24128     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24129     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24130                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24131   %}
24132   ins_pipe( pipe_slow );
24133 %}
24134 
24135 
24136 // --------------------------------- Bitwise Ternary Logic ----------------------------------
24137 
24138 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
24139   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
24140   effect(TEMP dst);
24141   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24142   ins_encode %{
24143     int vector_len = vector_length_encoding(this);
24144     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
24145   %}
24146   ins_pipe( pipe_slow );
24147 %}
24148 
24149 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
24150   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
24151   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
24152   effect(TEMP dst);
24153   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24154   ins_encode %{
24155     int vector_len = vector_length_encoding(this);
24156     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
24157   %}
24158   ins_pipe( pipe_slow );
24159 %}
24160 
24161 // --------------------------------- Rotation Operations ----------------------------------
24162 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
24163   match(Set dst (RotateLeftV src shift));
24164   match(Set dst (RotateRightV src shift));
24165   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
24166   ins_encode %{
24167     int opcode      = this->ideal_Opcode();
24168     int vector_len  = vector_length_encoding(this);
24169     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24170     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
24171   %}
24172   ins_pipe( pipe_slow );
24173 %}
24174 
24175 instruct vprorate(vec dst, vec src, vec shift) %{
24176   match(Set dst (RotateLeftV src shift));
24177   match(Set dst (RotateRightV src shift));
24178   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
24179   ins_encode %{
24180     int opcode      = this->ideal_Opcode();
24181     int vector_len  = vector_length_encoding(this);
24182     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24183     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
24184   %}
24185   ins_pipe( pipe_slow );
24186 %}
24187 
24188 // ---------------------------------- Masked Operations ------------------------------------
24189 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
24190   predicate(!n->in(3)->bottom_type()->isa_vectmask());
24191   match(Set dst (LoadVectorMasked mem mask));
24192   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24193   ins_encode %{
24194     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24195     int vlen_enc = vector_length_encoding(this);
24196     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
24197   %}
24198   ins_pipe( pipe_slow );
24199 %}
24200 
24201 
24202 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
24203   predicate(n->in(3)->bottom_type()->isa_vectmask());
24204   match(Set dst (LoadVectorMasked mem mask));
24205   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24206   ins_encode %{
24207     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
24208     int vector_len = vector_length_encoding(this);
24209     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
24210   %}
24211   ins_pipe( pipe_slow );
24212 %}
24213 
24214 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
24215   predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
24216   match(Set mem (StoreVectorMasked mem (Binary src mask)));
24217   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24218   ins_encode %{
24219     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24220     int vlen_enc = vector_length_encoding(src_node);
24221     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
24222     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
24223   %}
24224   ins_pipe( pipe_slow );
24225 %}
24226 
24227 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
24228   predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
24229   match(Set mem (StoreVectorMasked mem (Binary src mask)));
24230   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24231   ins_encode %{
24232     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24233     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
24234     int vlen_enc = vector_length_encoding(src_node);
24235     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
24236   %}
24237   ins_pipe( pipe_slow );
24238 %}
24239 
24240 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
24241   match(Set addr (VerifyVectorAlignment addr mask));
24242   effect(KILL cr);
24243   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
24244   ins_encode %{
24245     Label Lskip;
24246     // check if masked bits of addr are zero
24247     __ testq($addr$$Register, $mask$$constant);
24248     __ jccb(Assembler::equal, Lskip);
24249     __ stop("verify_vector_alignment found a misaligned vector memory access");
24250     __ bind(Lskip);
24251   %}
24252   ins_pipe(pipe_slow);
24253 %}
24254 
24255 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
24256   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
24257   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
24258   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
24259   ins_encode %{
24260     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
24261     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
24262 
24263     Label DONE;
24264     int vlen_enc = vector_length_encoding(this, $src1);
24265     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
24266 
24267     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
24268     __ mov64($dst$$Register, -1L);
24269     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
24270     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
24271     __ jccb(Assembler::carrySet, DONE);
24272     __ kmovql($dst$$Register, $ktmp1$$KRegister);
24273     __ notq($dst$$Register);
24274     __ tzcntq($dst$$Register, $dst$$Register);
24275     __ bind(DONE);
24276   %}
24277   ins_pipe( pipe_slow );
24278 %}
24279 
24280 
24281 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
24282   match(Set dst (VectorMaskGen len));
24283   effect(TEMP temp, KILL cr);
24284   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
24285   ins_encode %{
24286     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
24287   %}
24288   ins_pipe( pipe_slow );
24289 %}
24290 
24291 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
24292   match(Set dst (VectorMaskGen len));
24293   format %{ "vector_mask_gen $len \t! vector mask generator" %}
24294   effect(TEMP temp);
24295   ins_encode %{
24296     if ($len$$constant > 0) {
24297       __ mov64($temp$$Register, right_n_bits($len$$constant));
24298       __ kmovql($dst$$KRegister, $temp$$Register);
24299     } else {
24300       __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
24301     }
24302   %}
24303   ins_pipe( pipe_slow );
24304 %}
24305 
24306 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
24307   predicate(n->in(1)->bottom_type()->isa_vectmask());
24308   match(Set dst (VectorMaskToLong mask));
24309   effect(TEMP dst, KILL cr);
24310   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
24311   ins_encode %{
24312     int opcode = this->ideal_Opcode();
24313     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24314     int mask_len = Matcher::vector_length(this, $mask);
24315     int mask_size = mask_len * type2aelembytes(mbt);
24316     int vlen_enc = vector_length_encoding(this, $mask);
24317     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24318                              $dst$$Register, mask_len, mask_size, vlen_enc);
24319   %}
24320   ins_pipe( pipe_slow );
24321 %}
24322 
24323 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24324   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24325   match(Set dst (VectorMaskToLong mask));
24326   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24327   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24328   ins_encode %{
24329     int opcode = this->ideal_Opcode();
24330     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24331     int mask_len = Matcher::vector_length(this, $mask);
24332     int vlen_enc = vector_length_encoding(this, $mask);
24333     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24334                              $dst$$Register, mask_len, mbt, vlen_enc);
24335   %}
24336   ins_pipe( pipe_slow );
24337 %}
24338 
24339 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24340   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24341   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24342   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24343   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24344   ins_encode %{
24345     int opcode = this->ideal_Opcode();
24346     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24347     int mask_len = Matcher::vector_length(this, $mask);
24348     int vlen_enc = vector_length_encoding(this, $mask);
24349     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24350                              $dst$$Register, mask_len, mbt, vlen_enc);
24351   %}
24352   ins_pipe( pipe_slow );
24353 %}
24354 
24355 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24356   predicate(n->in(1)->bottom_type()->isa_vectmask());
24357   match(Set dst (VectorMaskTrueCount mask));
24358   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24359   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24360   ins_encode %{
24361     int opcode = this->ideal_Opcode();
24362     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24363     int mask_len = Matcher::vector_length(this, $mask);
24364     int mask_size = mask_len * type2aelembytes(mbt);
24365     int vlen_enc = vector_length_encoding(this, $mask);
24366     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24367                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24368   %}
24369   ins_pipe( pipe_slow );
24370 %}
24371 
24372 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24373   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24374   match(Set dst (VectorMaskTrueCount mask));
24375   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24376   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24377   ins_encode %{
24378     int opcode = this->ideal_Opcode();
24379     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24380     int mask_len = Matcher::vector_length(this, $mask);
24381     int vlen_enc = vector_length_encoding(this, $mask);
24382     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24383                              $tmp$$Register, mask_len, mbt, vlen_enc);
24384   %}
24385   ins_pipe( pipe_slow );
24386 %}
24387 
24388 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24389   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24390   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24391   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24392   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24393   ins_encode %{
24394     int opcode = this->ideal_Opcode();
24395     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24396     int mask_len = Matcher::vector_length(this, $mask);
24397     int vlen_enc = vector_length_encoding(this, $mask);
24398     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24399                              $tmp$$Register, mask_len, mbt, vlen_enc);
24400   %}
24401   ins_pipe( pipe_slow );
24402 %}
24403 
24404 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24405   predicate(n->in(1)->bottom_type()->isa_vectmask());
24406   match(Set dst (VectorMaskFirstTrue mask));
24407   match(Set dst (VectorMaskLastTrue mask));
24408   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24409   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24410   ins_encode %{
24411     int opcode = this->ideal_Opcode();
24412     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24413     int mask_len = Matcher::vector_length(this, $mask);
24414     int mask_size = mask_len * type2aelembytes(mbt);
24415     int vlen_enc = vector_length_encoding(this, $mask);
24416     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24417                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24418   %}
24419   ins_pipe( pipe_slow );
24420 %}
24421 
24422 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24423   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24424   match(Set dst (VectorMaskFirstTrue mask));
24425   match(Set dst (VectorMaskLastTrue mask));
24426   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24427   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24428   ins_encode %{
24429     int opcode = this->ideal_Opcode();
24430     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24431     int mask_len = Matcher::vector_length(this, $mask);
24432     int vlen_enc = vector_length_encoding(this, $mask);
24433     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24434                              $tmp$$Register, mask_len, mbt, vlen_enc);
24435   %}
24436   ins_pipe( pipe_slow );
24437 %}
24438 
24439 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24440   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24441   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24442   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24443   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24444   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24445   ins_encode %{
24446     int opcode = this->ideal_Opcode();
24447     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24448     int mask_len = Matcher::vector_length(this, $mask);
24449     int vlen_enc = vector_length_encoding(this, $mask);
24450     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24451                              $tmp$$Register, mask_len, mbt, vlen_enc);
24452   %}
24453   ins_pipe( pipe_slow );
24454 %}
24455 
24456 // --------------------------------- Compress/Expand Operations ---------------------------
24457 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24458   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24459   match(Set dst (CompressV src mask));
24460   match(Set dst (ExpandV src mask));
24461   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24462   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24463   ins_encode %{
24464     int opcode = this->ideal_Opcode();
24465     int vlen_enc = vector_length_encoding(this);
24466     BasicType bt  = Matcher::vector_element_basic_type(this);
24467     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24468                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24469   %}
24470   ins_pipe( pipe_slow );
24471 %}
24472 
24473 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24474   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24475   match(Set dst (CompressV src mask));
24476   match(Set dst (ExpandV src mask));
24477   format %{ "vector_compress_expand $dst, $src, $mask" %}
24478   ins_encode %{
24479     int opcode = this->ideal_Opcode();
24480     int vector_len = vector_length_encoding(this);
24481     BasicType bt  = Matcher::vector_element_basic_type(this);
24482     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24483   %}
24484   ins_pipe( pipe_slow );
24485 %}
24486 
24487 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24488   match(Set dst (CompressM mask));
24489   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24490   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24491   ins_encode %{
24492     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24493     int mask_len = Matcher::vector_length(this);
24494     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24495   %}
24496   ins_pipe( pipe_slow );
24497 %}
24498 
24499 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24500 
24501 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24502   predicate(!VM_Version::supports_gfni());
24503   match(Set dst (ReverseV src));
24504   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24505   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24506   ins_encode %{
24507     int vec_enc = vector_length_encoding(this);
24508     BasicType bt = Matcher::vector_element_basic_type(this);
24509     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24510                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24511   %}
24512   ins_pipe( pipe_slow );
24513 %}
24514 
24515 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24516   predicate(VM_Version::supports_gfni());
24517   match(Set dst (ReverseV src));
24518   effect(TEMP dst, TEMP xtmp);
24519   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24520   ins_encode %{
24521     int vec_enc = vector_length_encoding(this);
24522     BasicType bt  = Matcher::vector_element_basic_type(this);
24523     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24524     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24525                                $xtmp$$XMMRegister);
24526   %}
24527   ins_pipe( pipe_slow );
24528 %}
24529 
24530 instruct vreverse_byte_reg(vec dst, vec src) %{
24531   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24532   match(Set dst (ReverseBytesV src));
24533   effect(TEMP dst);
24534   format %{ "vector_reverse_byte $dst, $src" %}
24535   ins_encode %{
24536     int vec_enc = vector_length_encoding(this);
24537     BasicType bt = Matcher::vector_element_basic_type(this);
24538     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24539   %}
24540   ins_pipe( pipe_slow );
24541 %}
24542 
24543 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24544   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24545   match(Set dst (ReverseBytesV src));
24546   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24547   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24548   ins_encode %{
24549     int vec_enc = vector_length_encoding(this);
24550     BasicType bt = Matcher::vector_element_basic_type(this);
24551     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24552                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24553   %}
24554   ins_pipe( pipe_slow );
24555 %}
24556 
24557 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24558 
24559 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24560   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24561                                               Matcher::vector_length_in_bytes(n->in(1))));
24562   match(Set dst (CountLeadingZerosV src));
24563   format %{ "vector_count_leading_zeros $dst, $src" %}
24564   ins_encode %{
24565      int vlen_enc = vector_length_encoding(this, $src);
24566      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24567      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24568                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24569   %}
24570   ins_pipe( pipe_slow );
24571 %}
24572 
24573 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24574   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24575                                               Matcher::vector_length_in_bytes(n->in(1))));
24576   match(Set dst (CountLeadingZerosV src mask));
24577   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24578   ins_encode %{
24579     int vlen_enc = vector_length_encoding(this, $src);
24580     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24581     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24582     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24583                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24584   %}
24585   ins_pipe( pipe_slow );
24586 %}
24587 
24588 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24589   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24590             VM_Version::supports_avx512cd() &&
24591             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24592   match(Set dst (CountLeadingZerosV src));
24593   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24594   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24595   ins_encode %{
24596     int vlen_enc = vector_length_encoding(this, $src);
24597     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24598     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24599                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24600   %}
24601   ins_pipe( pipe_slow );
24602 %}
24603 
24604 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24605   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24606   match(Set dst (CountLeadingZerosV src));
24607   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24608   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24609   ins_encode %{
24610     int vlen_enc = vector_length_encoding(this, $src);
24611     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24612     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24613                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24614                                        $rtmp$$Register, true, vlen_enc);
24615   %}
24616   ins_pipe( pipe_slow );
24617 %}
24618 
24619 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24620   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24621             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24622   match(Set dst (CountLeadingZerosV src));
24623   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24624   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24625   ins_encode %{
24626     int vlen_enc = vector_length_encoding(this, $src);
24627     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24628     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24629                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24630   %}
24631   ins_pipe( pipe_slow );
24632 %}
24633 
24634 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24635   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24636             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24637   match(Set dst (CountLeadingZerosV src));
24638   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24639   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24640   ins_encode %{
24641     int vlen_enc = vector_length_encoding(this, $src);
24642     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24643     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24644                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24645   %}
24646   ins_pipe( pipe_slow );
24647 %}
24648 
24649 // ---------------------------------- Vector Masked Operations ------------------------------------
24650 
24651 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24652   match(Set dst (AddVB (Binary dst src2) mask));
24653   match(Set dst (AddVS (Binary dst src2) mask));
24654   match(Set dst (AddVI (Binary dst src2) mask));
24655   match(Set dst (AddVL (Binary dst src2) mask));
24656   match(Set dst (AddVF (Binary dst src2) mask));
24657   match(Set dst (AddVD (Binary dst src2) mask));
24658   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24659   ins_encode %{
24660     int vlen_enc = vector_length_encoding(this);
24661     BasicType bt = Matcher::vector_element_basic_type(this);
24662     int opc = this->ideal_Opcode();
24663     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24664                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24665   %}
24666   ins_pipe( pipe_slow );
24667 %}
24668 
24669 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24670   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24671   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24672   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24673   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24674   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24675   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24676   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24677   ins_encode %{
24678     int vlen_enc = vector_length_encoding(this);
24679     BasicType bt = Matcher::vector_element_basic_type(this);
24680     int opc = this->ideal_Opcode();
24681     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24682                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24683   %}
24684   ins_pipe( pipe_slow );
24685 %}
24686 
24687 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24688   match(Set dst (XorV (Binary dst src2) mask));
24689   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24690   ins_encode %{
24691     int vlen_enc = vector_length_encoding(this);
24692     BasicType bt = Matcher::vector_element_basic_type(this);
24693     int opc = this->ideal_Opcode();
24694     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24695                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24696   %}
24697   ins_pipe( pipe_slow );
24698 %}
24699 
24700 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24701   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24702   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24703   ins_encode %{
24704     int vlen_enc = vector_length_encoding(this);
24705     BasicType bt = Matcher::vector_element_basic_type(this);
24706     int opc = this->ideal_Opcode();
24707     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24708                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24709   %}
24710   ins_pipe( pipe_slow );
24711 %}
24712 
24713 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24714   match(Set dst (OrV (Binary dst src2) mask));
24715   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24716   ins_encode %{
24717     int vlen_enc = vector_length_encoding(this);
24718     BasicType bt = Matcher::vector_element_basic_type(this);
24719     int opc = this->ideal_Opcode();
24720     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24721                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24722   %}
24723   ins_pipe( pipe_slow );
24724 %}
24725 
24726 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24727   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24728   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24729   ins_encode %{
24730     int vlen_enc = vector_length_encoding(this);
24731     BasicType bt = Matcher::vector_element_basic_type(this);
24732     int opc = this->ideal_Opcode();
24733     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24734                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24735   %}
24736   ins_pipe( pipe_slow );
24737 %}
24738 
24739 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24740   match(Set dst (AndV (Binary dst src2) mask));
24741   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24742   ins_encode %{
24743     int vlen_enc = vector_length_encoding(this);
24744     BasicType bt = Matcher::vector_element_basic_type(this);
24745     int opc = this->ideal_Opcode();
24746     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24747                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24748   %}
24749   ins_pipe( pipe_slow );
24750 %}
24751 
24752 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24753   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24754   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24755   ins_encode %{
24756     int vlen_enc = vector_length_encoding(this);
24757     BasicType bt = Matcher::vector_element_basic_type(this);
24758     int opc = this->ideal_Opcode();
24759     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24760                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24761   %}
24762   ins_pipe( pipe_slow );
24763 %}
24764 
24765 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24766   match(Set dst (SubVB (Binary dst src2) mask));
24767   match(Set dst (SubVS (Binary dst src2) mask));
24768   match(Set dst (SubVI (Binary dst src2) mask));
24769   match(Set dst (SubVL (Binary dst src2) mask));
24770   match(Set dst (SubVF (Binary dst src2) mask));
24771   match(Set dst (SubVD (Binary dst src2) mask));
24772   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24773   ins_encode %{
24774     int vlen_enc = vector_length_encoding(this);
24775     BasicType bt = Matcher::vector_element_basic_type(this);
24776     int opc = this->ideal_Opcode();
24777     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24778                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24779   %}
24780   ins_pipe( pipe_slow );
24781 %}
24782 
24783 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24784   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24785   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24786   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24787   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24788   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24789   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24790   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24791   ins_encode %{
24792     int vlen_enc = vector_length_encoding(this);
24793     BasicType bt = Matcher::vector_element_basic_type(this);
24794     int opc = this->ideal_Opcode();
24795     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24796                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24797   %}
24798   ins_pipe( pipe_slow );
24799 %}
24800 
24801 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24802   match(Set dst (MulVS (Binary dst src2) mask));
24803   match(Set dst (MulVI (Binary dst src2) mask));
24804   match(Set dst (MulVL (Binary dst src2) mask));
24805   match(Set dst (MulVF (Binary dst src2) mask));
24806   match(Set dst (MulVD (Binary dst src2) mask));
24807   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24808   ins_encode %{
24809     int vlen_enc = vector_length_encoding(this);
24810     BasicType bt = Matcher::vector_element_basic_type(this);
24811     int opc = this->ideal_Opcode();
24812     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24813                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24814   %}
24815   ins_pipe( pipe_slow );
24816 %}
24817 
24818 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24819   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24820   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24821   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24822   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24823   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24824   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24825   ins_encode %{
24826     int vlen_enc = vector_length_encoding(this);
24827     BasicType bt = Matcher::vector_element_basic_type(this);
24828     int opc = this->ideal_Opcode();
24829     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24830                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24831   %}
24832   ins_pipe( pipe_slow );
24833 %}
24834 
24835 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24836   match(Set dst (SqrtVF dst mask));
24837   match(Set dst (SqrtVD dst mask));
24838   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24839   ins_encode %{
24840     int vlen_enc = vector_length_encoding(this);
24841     BasicType bt = Matcher::vector_element_basic_type(this);
24842     int opc = this->ideal_Opcode();
24843     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24844                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24845   %}
24846   ins_pipe( pipe_slow );
24847 %}
24848 
24849 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24850   match(Set dst (DivVF (Binary dst src2) mask));
24851   match(Set dst (DivVD (Binary dst src2) mask));
24852   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24853   ins_encode %{
24854     int vlen_enc = vector_length_encoding(this);
24855     BasicType bt = Matcher::vector_element_basic_type(this);
24856     int opc = this->ideal_Opcode();
24857     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24858                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24859   %}
24860   ins_pipe( pipe_slow );
24861 %}
24862 
24863 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24864   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24865   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24866   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24867   ins_encode %{
24868     int vlen_enc = vector_length_encoding(this);
24869     BasicType bt = Matcher::vector_element_basic_type(this);
24870     int opc = this->ideal_Opcode();
24871     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24872                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24873   %}
24874   ins_pipe( pipe_slow );
24875 %}
24876 
24877 
24878 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24879   match(Set dst (RotateLeftV (Binary dst shift) mask));
24880   match(Set dst (RotateRightV (Binary dst shift) mask));
24881   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24882   ins_encode %{
24883     int vlen_enc = vector_length_encoding(this);
24884     BasicType bt = Matcher::vector_element_basic_type(this);
24885     int opc = this->ideal_Opcode();
24886     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24887                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24888   %}
24889   ins_pipe( pipe_slow );
24890 %}
24891 
24892 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24893   match(Set dst (RotateLeftV (Binary dst src2) mask));
24894   match(Set dst (RotateRightV (Binary dst src2) mask));
24895   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24896   ins_encode %{
24897     int vlen_enc = vector_length_encoding(this);
24898     BasicType bt = Matcher::vector_element_basic_type(this);
24899     int opc = this->ideal_Opcode();
24900     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24901                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24902   %}
24903   ins_pipe( pipe_slow );
24904 %}
24905 
24906 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24907   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24908   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24909   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24910   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24911   ins_encode %{
24912     int vlen_enc = vector_length_encoding(this);
24913     BasicType bt = Matcher::vector_element_basic_type(this);
24914     int opc = this->ideal_Opcode();
24915     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24916                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24917   %}
24918   ins_pipe( pipe_slow );
24919 %}
24920 
24921 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24922   predicate(!n->as_ShiftV()->is_var_shift());
24923   match(Set dst (LShiftVS (Binary dst src2) mask));
24924   match(Set dst (LShiftVI (Binary dst src2) mask));
24925   match(Set dst (LShiftVL (Binary dst src2) mask));
24926   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24927   ins_encode %{
24928     int vlen_enc = vector_length_encoding(this);
24929     BasicType bt = Matcher::vector_element_basic_type(this);
24930     int opc = this->ideal_Opcode();
24931     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24932                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24933   %}
24934   ins_pipe( pipe_slow );
24935 %}
24936 
24937 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24938   predicate(n->as_ShiftV()->is_var_shift());
24939   match(Set dst (LShiftVS (Binary dst src2) mask));
24940   match(Set dst (LShiftVI (Binary dst src2) mask));
24941   match(Set dst (LShiftVL (Binary dst src2) mask));
24942   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24943   ins_encode %{
24944     int vlen_enc = vector_length_encoding(this);
24945     BasicType bt = Matcher::vector_element_basic_type(this);
24946     int opc = this->ideal_Opcode();
24947     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24948                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24949   %}
24950   ins_pipe( pipe_slow );
24951 %}
24952 
24953 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24954   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24955   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24956   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24957   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24958   ins_encode %{
24959     int vlen_enc = vector_length_encoding(this);
24960     BasicType bt = Matcher::vector_element_basic_type(this);
24961     int opc = this->ideal_Opcode();
24962     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24963                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24964   %}
24965   ins_pipe( pipe_slow );
24966 %}
24967 
24968 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24969   predicate(!n->as_ShiftV()->is_var_shift());
24970   match(Set dst (RShiftVS (Binary dst src2) mask));
24971   match(Set dst (RShiftVI (Binary dst src2) mask));
24972   match(Set dst (RShiftVL (Binary dst src2) mask));
24973   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24974   ins_encode %{
24975     int vlen_enc = vector_length_encoding(this);
24976     BasicType bt = Matcher::vector_element_basic_type(this);
24977     int opc = this->ideal_Opcode();
24978     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24979                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24980   %}
24981   ins_pipe( pipe_slow );
24982 %}
24983 
24984 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24985   predicate(n->as_ShiftV()->is_var_shift());
24986   match(Set dst (RShiftVS (Binary dst src2) mask));
24987   match(Set dst (RShiftVI (Binary dst src2) mask));
24988   match(Set dst (RShiftVL (Binary dst src2) mask));
24989   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24990   ins_encode %{
24991     int vlen_enc = vector_length_encoding(this);
24992     BasicType bt = Matcher::vector_element_basic_type(this);
24993     int opc = this->ideal_Opcode();
24994     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24995                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24996   %}
24997   ins_pipe( pipe_slow );
24998 %}
24999 
25000 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
25001   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
25002   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
25003   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
25004   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
25005   ins_encode %{
25006     int vlen_enc = vector_length_encoding(this);
25007     BasicType bt = Matcher::vector_element_basic_type(this);
25008     int opc = this->ideal_Opcode();
25009     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25010                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
25011   %}
25012   ins_pipe( pipe_slow );
25013 %}
25014 
25015 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
25016   predicate(!n->as_ShiftV()->is_var_shift());
25017   match(Set dst (URShiftVS (Binary dst src2) mask));
25018   match(Set dst (URShiftVI (Binary dst src2) mask));
25019   match(Set dst (URShiftVL (Binary dst src2) mask));
25020   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
25021   ins_encode %{
25022     int vlen_enc = vector_length_encoding(this);
25023     BasicType bt = Matcher::vector_element_basic_type(this);
25024     int opc = this->ideal_Opcode();
25025     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25026                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
25027   %}
25028   ins_pipe( pipe_slow );
25029 %}
25030 
25031 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
25032   predicate(n->as_ShiftV()->is_var_shift());
25033   match(Set dst (URShiftVS (Binary dst src2) mask));
25034   match(Set dst (URShiftVI (Binary dst src2) mask));
25035   match(Set dst (URShiftVL (Binary dst src2) mask));
25036   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
25037   ins_encode %{
25038     int vlen_enc = vector_length_encoding(this);
25039     BasicType bt = Matcher::vector_element_basic_type(this);
25040     int opc = this->ideal_Opcode();
25041     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25042                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
25043   %}
25044   ins_pipe( pipe_slow );
25045 %}
25046 
25047 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
25048   match(Set dst (MaxV (Binary dst src2) mask));
25049   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
25050   ins_encode %{
25051     int vlen_enc = vector_length_encoding(this);
25052     BasicType bt = Matcher::vector_element_basic_type(this);
25053     int opc = this->ideal_Opcode();
25054     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25055                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25056   %}
25057   ins_pipe( pipe_slow );
25058 %}
25059 
25060 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
25061   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
25062   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
25063   ins_encode %{
25064     int vlen_enc = vector_length_encoding(this);
25065     BasicType bt = Matcher::vector_element_basic_type(this);
25066     int opc = this->ideal_Opcode();
25067     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25068                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
25069   %}
25070   ins_pipe( pipe_slow );
25071 %}
25072 
25073 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
25074   match(Set dst (MinV (Binary dst src2) mask));
25075   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25076   ins_encode %{
25077     int vlen_enc = vector_length_encoding(this);
25078     BasicType bt = Matcher::vector_element_basic_type(this);
25079     int opc = this->ideal_Opcode();
25080     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25081                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25082   %}
25083   ins_pipe( pipe_slow );
25084 %}
25085 
25086 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
25087   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
25088   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25089   ins_encode %{
25090     int vlen_enc = vector_length_encoding(this);
25091     BasicType bt = Matcher::vector_element_basic_type(this);
25092     int opc = this->ideal_Opcode();
25093     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25094                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
25095   %}
25096   ins_pipe( pipe_slow );
25097 %}
25098 
25099 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
25100   match(Set dst (VectorRearrange (Binary dst src2) mask));
25101   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
25102   ins_encode %{
25103     int vlen_enc = vector_length_encoding(this);
25104     BasicType bt = Matcher::vector_element_basic_type(this);
25105     int opc = this->ideal_Opcode();
25106     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25107                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25108   %}
25109   ins_pipe( pipe_slow );
25110 %}
25111 
25112 instruct vabs_masked(vec dst, kReg mask) %{
25113   match(Set dst (AbsVB dst mask));
25114   match(Set dst (AbsVS dst mask));
25115   match(Set dst (AbsVI dst mask));
25116   match(Set dst (AbsVL dst mask));
25117   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
25118   ins_encode %{
25119     int vlen_enc = vector_length_encoding(this);
25120     BasicType bt = Matcher::vector_element_basic_type(this);
25121     int opc = this->ideal_Opcode();
25122     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25123                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
25124   %}
25125   ins_pipe( pipe_slow );
25126 %}
25127 
25128 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
25129   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
25130   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
25131   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25132   ins_encode %{
25133     assert(UseFMA, "Needs FMA instructions support.");
25134     int vlen_enc = vector_length_encoding(this);
25135     BasicType bt = Matcher::vector_element_basic_type(this);
25136     int opc = this->ideal_Opcode();
25137     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25138                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
25139   %}
25140   ins_pipe( pipe_slow );
25141 %}
25142 
25143 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
25144   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
25145   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
25146   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25147   ins_encode %{
25148     assert(UseFMA, "Needs FMA instructions support.");
25149     int vlen_enc = vector_length_encoding(this);
25150     BasicType bt = Matcher::vector_element_basic_type(this);
25151     int opc = this->ideal_Opcode();
25152     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25153                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
25154   %}
25155   ins_pipe( pipe_slow );
25156 %}
25157 
25158 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
25159   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
25160   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
25161   ins_encode %{
25162     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
25163     int vlen_enc = vector_length_encoding(this, $src1);
25164     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
25165 
25166     // Comparison i
25167     switch (src1_elem_bt) {
25168       case T_BYTE: {
25169         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25170         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25171         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25172         break;
25173       }
25174       case T_SHORT: {
25175         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25176         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25177         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25178         break;
25179       }
25180       case T_INT: {
25181         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25182         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25183         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25184         break;
25185       }
25186       case T_LONG: {
25187         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25188         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25189         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25190         break;
25191       }
25192       case T_FLOAT: {
25193         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25194         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25195         break;
25196       }
25197       case T_DOUBLE: {
25198         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25199         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25200         break;
25201       }
25202       default: assert(false, "%s", type2name(src1_elem_bt)); break;
25203     }
25204   %}
25205   ins_pipe( pipe_slow );
25206 %}
25207 
25208 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
25209   predicate(Matcher::vector_length(n) <= 32);
25210   match(Set dst (MaskAll src));
25211   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
25212   ins_encode %{
25213     int mask_len = Matcher::vector_length(this);
25214     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
25215   %}
25216   ins_pipe( pipe_slow );
25217 %}
25218 
25219 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
25220   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
25221   match(Set dst (XorVMask src (MaskAll cnt)));
25222   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
25223   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
25224   ins_encode %{
25225     uint masklen = Matcher::vector_length(this);
25226     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
25227   %}
25228   ins_pipe( pipe_slow );
25229 %}
25230 
25231 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
25232   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
25233             (Matcher::vector_length(n) == 16) ||
25234             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
25235   match(Set dst (XorVMask src (MaskAll cnt)));
25236   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
25237   ins_encode %{
25238     uint masklen = Matcher::vector_length(this);
25239     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
25240   %}
25241   ins_pipe( pipe_slow );
25242 %}
25243 
25244 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2) %{
25245   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
25246   match(Set dst (VectorLongToMask src));
25247   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2);
25248   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2" %}
25249   ins_encode %{
25250     int mask_len = Matcher::vector_length(this);
25251     int vec_enc  = vector_length_encoding(mask_len);
25252     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25253                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
25254   %}
25255   ins_pipe( pipe_slow );
25256 %}
25257 
25258 
25259 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
25260   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
25261   match(Set dst (VectorLongToMask src));
25262   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
25263   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
25264   ins_encode %{
25265     int mask_len = Matcher::vector_length(this);
25266     assert(mask_len <= 32, "invalid mask length");
25267     int vec_enc  = vector_length_encoding(mask_len);
25268     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25269                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
25270   %}
25271   ins_pipe( pipe_slow );
25272 %}
25273 
25274 instruct long_to_mask_evex(kReg dst, rRegL src) %{
25275   predicate(n->bottom_type()->isa_vectmask());
25276   match(Set dst (VectorLongToMask src));
25277   format %{ "long_to_mask_evex $dst, $src\t!" %}
25278   ins_encode %{
25279     __ kmov($dst$$KRegister, $src$$Register);
25280   %}
25281   ins_pipe( pipe_slow );
25282 %}
25283 
25284 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
25285   match(Set dst (AndVMask src1 src2));
25286   match(Set dst (OrVMask src1 src2));
25287   match(Set dst (XorVMask src1 src2));
25288   effect(TEMP kscratch);
25289   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
25290   ins_encode %{
25291     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
25292     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
25293     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
25294     uint masklen = Matcher::vector_length(this);
25295     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
25296     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
25297   %}
25298   ins_pipe( pipe_slow );
25299 %}
25300 
25301 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
25302   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25303   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25304   ins_encode %{
25305     int vlen_enc = vector_length_encoding(this);
25306     BasicType bt = Matcher::vector_element_basic_type(this);
25307     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25308                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
25309   %}
25310   ins_pipe( pipe_slow );
25311 %}
25312 
25313 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
25314   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25315   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25316   ins_encode %{
25317     int vlen_enc = vector_length_encoding(this);
25318     BasicType bt = Matcher::vector_element_basic_type(this);
25319     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25320                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25321   %}
25322   ins_pipe( pipe_slow );
25323 %}
25324 
25325 instruct castMM(kReg dst)
25326 %{
25327   match(Set dst (CastVV dst));
25328 
25329   size(0);
25330   format %{ "# castVV of $dst" %}
25331   ins_encode(/* empty encoding */);
25332   ins_cost(0);
25333   ins_pipe(empty);
25334 %}
25335 
25336 instruct castVV(vec dst)
25337 %{
25338   match(Set dst (CastVV dst));
25339 
25340   size(0);
25341   format %{ "# castVV of $dst" %}
25342   ins_encode(/* empty encoding */);
25343   ins_cost(0);
25344   ins_pipe(empty);
25345 %}
25346 
25347 instruct castVVLeg(legVec dst)
25348 %{
25349   match(Set dst (CastVV dst));
25350 
25351   size(0);
25352   format %{ "# castVV of $dst" %}
25353   ins_encode(/* empty encoding */);
25354   ins_cost(0);
25355   ins_pipe(empty);
25356 %}
25357 
25358 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25359 %{
25360   match(Set dst (IsInfiniteF src));
25361   effect(TEMP ktmp, KILL cr);
25362   format %{ "float_class_check $dst, $src" %}
25363   ins_encode %{
25364     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25365     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25366   %}
25367   ins_pipe(pipe_slow);
25368 %}
25369 
25370 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25371 %{
25372   match(Set dst (IsInfiniteD src));
25373   effect(TEMP ktmp, KILL cr);
25374   format %{ "double_class_check $dst, $src" %}
25375   ins_encode %{
25376     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25377     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25378   %}
25379   ins_pipe(pipe_slow);
25380 %}
25381 
25382 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25383 %{
25384   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25385             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25386   match(Set dst (SaturatingAddV src1 src2));
25387   match(Set dst (SaturatingSubV src1 src2));
25388   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25389   ins_encode %{
25390     int vlen_enc = vector_length_encoding(this);
25391     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25392     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25393                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25394   %}
25395   ins_pipe(pipe_slow);
25396 %}
25397 
25398 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25399 %{
25400   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25401             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25402   match(Set dst (SaturatingAddV src1 src2));
25403   match(Set dst (SaturatingSubV src1 src2));
25404   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25405   ins_encode %{
25406     int vlen_enc = vector_length_encoding(this);
25407     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25408     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25409                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25410   %}
25411   ins_pipe(pipe_slow);
25412 %}
25413 
25414 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25415 %{
25416   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25417             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25418             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25419   match(Set dst (SaturatingAddV src1 src2));
25420   match(Set dst (SaturatingSubV src1 src2));
25421   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25422   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25423   ins_encode %{
25424     int vlen_enc = vector_length_encoding(this);
25425     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25426     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25427                                         $src1$$XMMRegister, $src2$$XMMRegister,
25428                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25429                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25430   %}
25431   ins_pipe(pipe_slow);
25432 %}
25433 
25434 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25435 %{
25436   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25437             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25438             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25439   match(Set dst (SaturatingAddV src1 src2));
25440   match(Set dst (SaturatingSubV src1 src2));
25441   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25442   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25443   ins_encode %{
25444     int vlen_enc = vector_length_encoding(this);
25445     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25446     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25447                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25448                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25449   %}
25450   ins_pipe(pipe_slow);
25451 %}
25452 
25453 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25454 %{
25455   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25456             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25457             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25458   match(Set dst (SaturatingAddV src1 src2));
25459   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25460   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25461   ins_encode %{
25462     int vlen_enc = vector_length_encoding(this);
25463     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25464     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25465                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25466   %}
25467   ins_pipe(pipe_slow);
25468 %}
25469 
25470 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25471 %{
25472   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25473             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25474             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25475   match(Set dst (SaturatingAddV src1 src2));
25476   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25477   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25478   ins_encode %{
25479     int vlen_enc = vector_length_encoding(this);
25480     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25481     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25482                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25483   %}
25484   ins_pipe(pipe_slow);
25485 %}
25486 
25487 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25488 %{
25489   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25490             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25491             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25492   match(Set dst (SaturatingSubV src1 src2));
25493   effect(TEMP ktmp);
25494   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25495   ins_encode %{
25496     int vlen_enc = vector_length_encoding(this);
25497     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25498     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25499                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25500   %}
25501   ins_pipe(pipe_slow);
25502 %}
25503 
25504 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25505 %{
25506   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25507             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25508             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25509   match(Set dst (SaturatingSubV src1 src2));
25510   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25511   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25512   ins_encode %{
25513     int vlen_enc = vector_length_encoding(this);
25514     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25515     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25516                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25517   %}
25518   ins_pipe(pipe_slow);
25519 %}
25520 
25521 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25522 %{
25523   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25524             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25525   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25526   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25527   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25528   ins_encode %{
25529     int vlen_enc = vector_length_encoding(this);
25530     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25531     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25532                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25533   %}
25534   ins_pipe(pipe_slow);
25535 %}
25536 
25537 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25538 %{
25539   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25540             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25541   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25542   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25543   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25544   ins_encode %{
25545     int vlen_enc = vector_length_encoding(this);
25546     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25547     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25548                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25549   %}
25550   ins_pipe(pipe_slow);
25551 %}
25552 
25553 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25554   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25555             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25556   match(Set dst (SaturatingAddV (Binary dst src) mask));
25557   match(Set dst (SaturatingSubV (Binary dst src) mask));
25558   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25559   ins_encode %{
25560     int vlen_enc = vector_length_encoding(this);
25561     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25562     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25563                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25564   %}
25565   ins_pipe( pipe_slow );
25566 %}
25567 
25568 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25569   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25570             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25571   match(Set dst (SaturatingAddV (Binary dst src) mask));
25572   match(Set dst (SaturatingSubV (Binary dst src) mask));
25573   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25574   ins_encode %{
25575     int vlen_enc = vector_length_encoding(this);
25576     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25577     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25578                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25579   %}
25580   ins_pipe( pipe_slow );
25581 %}
25582 
25583 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25584   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25585             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25586   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25587   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25588   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25589   ins_encode %{
25590     int vlen_enc = vector_length_encoding(this);
25591     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25592     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25593                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25594   %}
25595   ins_pipe( pipe_slow );
25596 %}
25597 
25598 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25599   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25600             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25601   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25602   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25603   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25604   ins_encode %{
25605     int vlen_enc = vector_length_encoding(this);
25606     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25607     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25608                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25609   %}
25610   ins_pipe( pipe_slow );
25611 %}
25612 
25613 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25614 %{
25615   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25616   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25617   ins_encode %{
25618     int vlen_enc = vector_length_encoding(this);
25619     BasicType bt = Matcher::vector_element_basic_type(this);
25620     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25621   %}
25622   ins_pipe(pipe_slow);
25623 %}
25624 
25625 instruct reinterpretS2HF(regF dst, rRegI src)
25626 %{
25627   match(Set dst (ReinterpretS2HF src));
25628   format %{ "evmovw $dst, $src" %}
25629   ins_encode %{
25630     __ evmovw($dst$$XMMRegister, $src$$Register);
25631   %}
25632   ins_pipe(pipe_slow);
25633 %}
25634 
25635 instruct reinterpretHF2S(rRegI dst, regF src)
25636 %{
25637   match(Set dst (ReinterpretHF2S src));
25638   format %{ "evmovw $dst, $src" %}
25639   ins_encode %{
25640     __ evmovw($dst$$Register, $src$$XMMRegister);
25641     __ narrow_subword_type($dst$$Register, T_SHORT);
25642   %}
25643   ins_pipe(pipe_slow);
25644 %}
25645 
25646 instruct convF2HFAndS2HF(regF dst, regF src)
25647 %{
25648   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25649   format %{ "convF2HFAndS2HF $dst, $src" %}
25650   ins_encode %{
25651     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25652   %}
25653   ins_pipe(pipe_slow);
25654 %}
25655 
25656 instruct convHF2SAndHF2F(regF dst, regF src)
25657 %{
25658   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25659   format %{ "convHF2SAndHF2F $dst, $src" %}
25660   ins_encode %{
25661     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25662   %}
25663   ins_pipe(pipe_slow);
25664 %}
25665 
25666 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25667 %{
25668   match(Set dst (SqrtHF src));
25669   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25670   ins_encode %{
25671     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25672   %}
25673   ins_pipe(pipe_slow);
25674 %}
25675 
25676 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25677 %{
25678   match(Set dst (AddHF src1 src2));
25679   match(Set dst (DivHF src1 src2));
25680   match(Set dst (MulHF src1 src2));
25681   match(Set dst (SubHF src1 src2));
25682   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25683   ins_encode %{
25684     int opcode = this->ideal_Opcode();
25685     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25686   %}
25687   ins_pipe(pipe_slow);
25688 %}
25689 
25690 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25691 %{
25692   predicate(VM_Version::supports_avx10_2());
25693   match(Set dst (MaxHF src1 src2));
25694   match(Set dst (MinHF src1 src2));
25695 
25696   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25697   ins_encode %{
25698     int opcode = this->ideal_Opcode();
25699     __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
25700   %}
25701   ins_pipe( pipe_slow );
25702 %}
25703 
25704 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25705 %{
25706   predicate(!VM_Version::supports_avx10_2());
25707   match(Set dst (MaxHF src1 src2));
25708   match(Set dst (MinHF src1 src2));
25709   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25710 
25711   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25712   ins_encode %{
25713     int opcode = this->ideal_Opcode();
25714     __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25715                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25716   %}
25717   ins_pipe( pipe_slow );
25718 %}
25719 
25720 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25721 %{
25722   match(Set dst (FmaHF  src2 (Binary dst src1)));
25723   effect(DEF dst);
25724   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25725   ins_encode %{
25726     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25727   %}
25728   ins_pipe( pipe_slow );
25729 %}
25730 
25731 
25732 instruct vector_sqrt_HF_reg(vec dst, vec src)
25733 %{
25734   match(Set dst (SqrtVHF src));
25735   format %{ "vector_sqrt_fp16 $dst, $src" %}
25736   ins_encode %{
25737     int vlen_enc = vector_length_encoding(this);
25738     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25739   %}
25740   ins_pipe(pipe_slow);
25741 %}
25742 
25743 instruct vector_sqrt_HF_mem(vec dst, memory src)
25744 %{
25745   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25746   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25747   ins_encode %{
25748     int vlen_enc = vector_length_encoding(this);
25749     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25750   %}
25751   ins_pipe(pipe_slow);
25752 %}
25753 
25754 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25755 %{
25756   match(Set dst (AddVHF src1 src2));
25757   match(Set dst (DivVHF src1 src2));
25758   match(Set dst (MulVHF src1 src2));
25759   match(Set dst (SubVHF src1 src2));
25760   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25761   ins_encode %{
25762     int vlen_enc = vector_length_encoding(this);
25763     int opcode = this->ideal_Opcode();
25764     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25765   %}
25766   ins_pipe(pipe_slow);
25767 %}
25768 
25769 
25770 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25771 %{
25772   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25773   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25774   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25775   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25776   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25777   ins_encode %{
25778     int vlen_enc = vector_length_encoding(this);
25779     int opcode = this->ideal_Opcode();
25780     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25781   %}
25782   ins_pipe(pipe_slow);
25783 %}
25784 
25785 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25786 %{
25787   match(Set dst (FmaVHF src2 (Binary dst src1)));
25788   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25789   ins_encode %{
25790     int vlen_enc = vector_length_encoding(this);
25791     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25792   %}
25793   ins_pipe( pipe_slow );
25794 %}
25795 
25796 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25797 %{
25798   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25799   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25800   ins_encode %{
25801     int vlen_enc = vector_length_encoding(this);
25802     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25803   %}
25804   ins_pipe( pipe_slow );
25805 %}
25806 
25807 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25808 %{
25809   predicate(VM_Version::supports_avx10_2());
25810   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25811   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25812   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25813   ins_encode %{
25814     int vlen_enc = vector_length_encoding(this);
25815     int opcode = this->ideal_Opcode();
25816     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
25817                             k0, vlen_enc);
25818   %}
25819   ins_pipe( pipe_slow );
25820 %}
25821 
25822 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25823 %{
25824   predicate(VM_Version::supports_avx10_2());
25825   match(Set dst (MinVHF src1 src2));
25826   match(Set dst (MaxVHF src1 src2));
25827   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25828   ins_encode %{
25829     int vlen_enc = vector_length_encoding(this);
25830     int opcode = this->ideal_Opcode();
25831     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25832                             k0, vlen_enc);
25833   %}
25834   ins_pipe( pipe_slow );
25835 %}
25836 
25837 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25838 %{
25839   predicate(!VM_Version::supports_avx10_2());
25840   match(Set dst (MinVHF src1 src2));
25841   match(Set dst (MaxVHF src1 src2));
25842   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25843   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25844   ins_encode %{
25845     int vlen_enc = vector_length_encoding(this);
25846     int opcode = this->ideal_Opcode();
25847     __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25848                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25849   %}
25850   ins_pipe( pipe_slow );
25851 %}
25852 
25853 //----------PEEPHOLE RULES-----------------------------------------------------
25854 // These must follow all instruction definitions as they use the names
25855 // defined in the instructions definitions.
25856 //
25857 // peeppredicate ( rule_predicate );
25858 // // the predicate unless which the peephole rule will be ignored
25859 //
25860 // peepmatch ( root_instr_name [preceding_instruction]* );
25861 //
25862 // peepprocedure ( procedure_name );
25863 // // provide a procedure name to perform the optimization, the procedure should
25864 // // reside in the architecture dependent peephole file, the method has the
25865 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25866 // // with the arguments being the basic block, the current node index inside the
25867 // // block, the register allocator, the functions upon invoked return a new node
25868 // // defined in peepreplace, and the rules of the nodes appearing in the
25869 // // corresponding peepmatch, the function return true if successful, else
25870 // // return false
25871 //
25872 // peepconstraint %{
25873 // (instruction_number.operand_name relational_op instruction_number.operand_name
25874 //  [, ...] );
25875 // // instruction numbers are zero-based using left to right order in peepmatch
25876 //
25877 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25878 // // provide an instruction_number.operand_name for each operand that appears
25879 // // in the replacement instruction's match rule
25880 //
25881 // ---------VM FLAGS---------------------------------------------------------
25882 //
25883 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25884 //
25885 // Each peephole rule is given an identifying number starting with zero and
25886 // increasing by one in the order seen by the parser.  An individual peephole
25887 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25888 // on the command-line.
25889 //
25890 // ---------CURRENT LIMITATIONS----------------------------------------------
25891 //
25892 // Only transformations inside a basic block (do we need more for peephole)
25893 //
25894 // ---------EXAMPLE----------------------------------------------------------
25895 //
25896 // // pertinent parts of existing instructions in architecture description
25897 // instruct movI(rRegI dst, rRegI src)
25898 // %{
25899 //   match(Set dst (CopyI src));
25900 // %}
25901 //
25902 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25903 // %{
25904 //   match(Set dst (AddI dst src));
25905 //   effect(KILL cr);
25906 // %}
25907 //
25908 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25909 // %{
25910 //   match(Set dst (AddI dst src));
25911 // %}
25912 //
25913 // 1. Simple replacement
25914 // - Only match adjacent instructions in same basic block
25915 // - Only equality constraints
25916 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25917 // - Only one replacement instruction
25918 //
25919 // // Change (inc mov) to lea
25920 // peephole %{
25921 //   // lea should only be emitted when beneficial
25922 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25923 //   // increment preceded by register-register move
25924 //   peepmatch ( incI_rReg movI );
25925 //   // require that the destination register of the increment
25926 //   // match the destination register of the move
25927 //   peepconstraint ( 0.dst == 1.dst );
25928 //   // construct a replacement instruction that sets
25929 //   // the destination to ( move's source register + one )
25930 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25931 // %}
25932 //
25933 // 2. Procedural replacement
25934 // - More flexible finding relevent nodes
25935 // - More flexible constraints
25936 // - More flexible transformations
25937 // - May utilise architecture-dependent API more effectively
25938 // - Currently only one replacement instruction due to adlc parsing capabilities
25939 //
25940 // // Change (inc mov) to lea
25941 // peephole %{
25942 //   // lea should only be emitted when beneficial
25943 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25944 //   // the rule numbers of these nodes inside are passed into the function below
25945 //   peepmatch ( incI_rReg movI );
25946 //   // the method that takes the responsibility of transformation
25947 //   peepprocedure ( inc_mov_to_lea );
25948 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25949 //   // node is passed into the function above
25950 //   peepreplace ( leaI_rReg_immI() );
25951 // %}
25952 
25953 // These instructions is not matched by the matcher but used by the peephole
25954 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25955 %{
25956   predicate(false);
25957   match(Set dst (AddI src1 src2));
25958   format %{ "leal    $dst, [$src1 + $src2]" %}
25959   ins_encode %{
25960     Register dst = $dst$$Register;
25961     Register src1 = $src1$$Register;
25962     Register src2 = $src2$$Register;
25963     if (src1 != rbp && src1 != r13) {
25964       __ leal(dst, Address(src1, src2, Address::times_1));
25965     } else {
25966       assert(src2 != rbp && src2 != r13, "");
25967       __ leal(dst, Address(src2, src1, Address::times_1));
25968     }
25969   %}
25970   ins_pipe(ialu_reg_reg);
25971 %}
25972 
25973 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25974 %{
25975   predicate(false);
25976   match(Set dst (AddI src1 src2));
25977   format %{ "leal    $dst, [$src1 + $src2]" %}
25978   ins_encode %{
25979     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25980   %}
25981   ins_pipe(ialu_reg_reg);
25982 %}
25983 
25984 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25985 %{
25986   predicate(false);
25987   match(Set dst (LShiftI src shift));
25988   format %{ "leal    $dst, [$src << $shift]" %}
25989   ins_encode %{
25990     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25991     Register src = $src$$Register;
25992     if (scale == Address::times_2 && src != rbp && src != r13) {
25993       __ leal($dst$$Register, Address(src, src, Address::times_1));
25994     } else {
25995       __ leal($dst$$Register, Address(noreg, src, scale));
25996     }
25997   %}
25998   ins_pipe(ialu_reg_reg);
25999 %}
26000 
26001 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
26002 %{
26003   predicate(false);
26004   match(Set dst (AddL src1 src2));
26005   format %{ "leaq    $dst, [$src1 + $src2]" %}
26006   ins_encode %{
26007     Register dst = $dst$$Register;
26008     Register src1 = $src1$$Register;
26009     Register src2 = $src2$$Register;
26010     if (src1 != rbp && src1 != r13) {
26011       __ leaq(dst, Address(src1, src2, Address::times_1));
26012     } else {
26013       assert(src2 != rbp && src2 != r13, "");
26014       __ leaq(dst, Address(src2, src1, Address::times_1));
26015     }
26016   %}
26017   ins_pipe(ialu_reg_reg);
26018 %}
26019 
26020 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
26021 %{
26022   predicate(false);
26023   match(Set dst (AddL src1 src2));
26024   format %{ "leaq    $dst, [$src1 + $src2]" %}
26025   ins_encode %{
26026     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
26027   %}
26028   ins_pipe(ialu_reg_reg);
26029 %}
26030 
26031 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
26032 %{
26033   predicate(false);
26034   match(Set dst (LShiftL src shift));
26035   format %{ "leaq    $dst, [$src << $shift]" %}
26036   ins_encode %{
26037     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
26038     Register src = $src$$Register;
26039     if (scale == Address::times_2 && src != rbp && src != r13) {
26040       __ leaq($dst$$Register, Address(src, src, Address::times_1));
26041     } else {
26042       __ leaq($dst$$Register, Address(noreg, src, scale));
26043     }
26044   %}
26045   ins_pipe(ialu_reg_reg);
26046 %}
26047 
26048 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
26049 // sal}) with lea instructions. The {add, sal} rules are beneficial in
26050 // processors with at least partial ALU support for lea
26051 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
26052 // beneficial for processors with full ALU support
26053 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
26054 
26055 peephole
26056 %{
26057   peeppredicate(VM_Version::supports_fast_2op_lea());
26058   peepmatch (addI_rReg);
26059   peepprocedure (lea_coalesce_reg);
26060   peepreplace (leaI_rReg_rReg_peep());
26061 %}
26062 
26063 peephole
26064 %{
26065   peeppredicate(VM_Version::supports_fast_2op_lea());
26066   peepmatch (addI_rReg_imm);
26067   peepprocedure (lea_coalesce_imm);
26068   peepreplace (leaI_rReg_immI_peep());
26069 %}
26070 
26071 peephole
26072 %{
26073   peeppredicate(VM_Version::supports_fast_3op_lea() ||
26074                 VM_Version::is_intel_cascade_lake());
26075   peepmatch (incI_rReg);
26076   peepprocedure (lea_coalesce_imm);
26077   peepreplace (leaI_rReg_immI_peep());
26078 %}
26079 
26080 peephole
26081 %{
26082   peeppredicate(VM_Version::supports_fast_3op_lea() ||
26083                 VM_Version::is_intel_cascade_lake());
26084   peepmatch (decI_rReg);
26085   peepprocedure (lea_coalesce_imm);
26086   peepreplace (leaI_rReg_immI_peep());
26087 %}
26088 
26089 peephole
26090 %{
26091   peeppredicate(VM_Version::supports_fast_2op_lea());
26092   peepmatch (salI_rReg_immI2);
26093   peepprocedure (lea_coalesce_imm);
26094   peepreplace (leaI_rReg_immI2_peep());
26095 %}
26096 
26097 peephole
26098 %{
26099   peeppredicate(VM_Version::supports_fast_2op_lea());
26100   peepmatch (addL_rReg);
26101   peepprocedure (lea_coalesce_reg);
26102   peepreplace (leaL_rReg_rReg_peep());
26103 %}
26104 
26105 peephole
26106 %{
26107   peeppredicate(VM_Version::supports_fast_2op_lea());
26108   peepmatch (addL_rReg_imm);
26109   peepprocedure (lea_coalesce_imm);
26110   peepreplace (leaL_rReg_immL32_peep());
26111 %}
26112 
26113 peephole
26114 %{
26115   peeppredicate(VM_Version::supports_fast_3op_lea() ||
26116                 VM_Version::is_intel_cascade_lake());
26117   peepmatch (incL_rReg);
26118   peepprocedure (lea_coalesce_imm);
26119   peepreplace (leaL_rReg_immL32_peep());
26120 %}
26121 
26122 peephole
26123 %{
26124   peeppredicate(VM_Version::supports_fast_3op_lea() ||
26125                 VM_Version::is_intel_cascade_lake());
26126   peepmatch (decL_rReg);
26127   peepprocedure (lea_coalesce_imm);
26128   peepreplace (leaL_rReg_immL32_peep());
26129 %}
26130 
26131 peephole
26132 %{
26133   peeppredicate(VM_Version::supports_fast_2op_lea());
26134   peepmatch (salL_rReg_immI2);
26135   peepprocedure (lea_coalesce_imm);
26136   peepreplace (leaL_rReg_immI2_peep());
26137 %}
26138 
26139 peephole
26140 %{
26141   peepmatch (leaPCompressedOopOffset);
26142   peepprocedure (lea_remove_redundant);
26143 %}
26144 
26145 peephole
26146 %{
26147   peepmatch (leaP8Narrow);
26148   peepprocedure (lea_remove_redundant);
26149 %}
26150 
26151 peephole
26152 %{
26153   peepmatch (leaP32Narrow);
26154   peepprocedure (lea_remove_redundant);
26155 %}
26156 
26157 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
26158 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
26159 
26160 //int variant
26161 peephole
26162 %{
26163   peepmatch (testI_reg);
26164   peepprocedure (test_may_remove);
26165 %}
26166 
26167 //long variant
26168 peephole
26169 %{
26170   peepmatch (testL_reg);
26171   peepprocedure (test_may_remove);
26172 %}
26173 
26174 
26175 //----------SMARTSPILL RULES---------------------------------------------------
26176 // These must follow all instruction definitions as they use the names
26177 // defined in the instructions definitions.