1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   if (_entry_point == nullptr) {
 1653     // CallLeafNoFPInDirect
 1654     return 3; // callq (register)
 1655   }
 1656   int offset = 13; // movq r10,#addr; callq (r10)
 1657   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1658     offset += clear_avx_size();
 1659   }
 1660   return offset;
 1661 }
 1662 //
 1663 // Compute padding required for nodes which need alignment
 1664 //
 1665 
 1666 // The address of the call instruction needs to be 4-byte aligned to
 1667 // ensure that it does not span a cache line so that it can be patched.
 1668 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1669 {
 1670   current_offset += clear_avx_size(); // skip vzeroupper
 1671   current_offset += 1; // skip call opcode byte
 1672   return align_up(current_offset, alignment_required()) - current_offset;
 1673 }
 1674 
 1675 // The address of the call instruction needs to be 4-byte aligned to
 1676 // ensure that it does not span a cache line so that it can be patched.
 1677 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1678 {
 1679   current_offset += clear_avx_size(); // skip vzeroupper
 1680   current_offset += 11; // skip movq instruction + call opcode byte
 1681   return align_up(current_offset, alignment_required()) - current_offset;
 1682 }
 1683 
 1684 // This could be in MacroAssembler but it's fairly C2 specific
 1685 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1686   Label exit;
 1687   __ jccb(Assembler::noParity, exit);
 1688   __ pushf();
 1689   //
 1690   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1691   // zero OF,AF,SF for NaN values.
 1692   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1693   // values returns 'less than' result (CF is set).
 1694   // Leave the rest of flags unchanged.
 1695   //
 1696   //    7 6 5 4 3 2 1 0
 1697   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1698   //    0 0 1 0 1 0 1 1   (0x2B)
 1699   //
 1700   __ andq(Address(rsp, 0), 0xffffff2b);
 1701   __ popf();
 1702   __ bind(exit);
 1703 }
 1704 
 1705 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1706   // If any floating point comparison instruction is used, unordered case always triggers jump
 1707   // for below condition, CF=1 is true when at least one input is NaN
 1708   Label done;
 1709   __ movl(dst, -1);
 1710   __ jcc(Assembler::below, done);
 1711   __ setcc(Assembler::notEqual, dst);
 1712   __ bind(done);
 1713 }
 1714 
 1715 enum FP_PREC {
 1716   fp_prec_hlf,
 1717   fp_prec_flt,
 1718   fp_prec_dbl
 1719 };
 1720 
 1721 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
 1722                                 XMMRegister p, XMMRegister q) {
 1723   if (pt == fp_prec_hlf) {
 1724     __ evucomish(p, q);
 1725   } else if (pt == fp_prec_flt) {
 1726     __ ucomiss(p, q);
 1727   } else {
 1728     __ ucomisd(p, q);
 1729   }
 1730 }
 1731 
 1732 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
 1733                          XMMRegister dst, XMMRegister src, Register scratch) {
 1734   if (pt == fp_prec_hlf) {
 1735     __ movhlf(dst, src, scratch);
 1736   } else if (pt == fp_prec_flt) {
 1737     __ movflt(dst, src);
 1738   } else {
 1739     __ movdbl(dst, src);
 1740   }
 1741 }
 1742 
 1743 // Math.min()          # Math.max()
 1744 // -----------------------------
 1745 // (v)ucomis[h/s/d]    #
 1746 // ja   -> b           # a
 1747 // jp   -> NaN         # NaN
 1748 // jb   -> a           # b
 1749 // je   -> a | b       # a & b
 1750 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1751                             XMMRegister a, XMMRegister b, Register rt,
 1752                             bool min, enum FP_PREC pt) {
 1753   Label nan, zero, below, above, done;
 1754 
 1755   emit_fp_ucom(masm, pt, a, b);
 1756 
 1757   if (dst->encoding() != (min ? b : a)->encoding()) {
 1758     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1759   } else {
 1760     __ jccb(Assembler::above, done);
 1761   }
 1762   __ jccb(Assembler::parity, nan);  // PF=1
 1763   __ jccb(Assembler::below, below); // CF=1
 1764 
 1765   // equal
 1766   // Using bitwise operations is a low cost way to compute the correct result
 1767   // for zero and non-zero inputs in this scenario except for NaN, which is
 1768   // handled separately. The mantissa and exponent are valid with either
 1769   // bitwise operation. For zero inputs, the sign bit is chosen according to
 1770   // whether a minimum or maximum value is required.
 1771   if (min) {
 1772     // Negative sign preserved when available (e.g., min(+0, -0) -> -0)
 1773     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1774   } else {
 1775     // Positive sign preserved when available (e.g., max(+0, -0) -> +0)
 1776     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1777   }
 1778   __ jmp(done);
 1779 
 1780   __ bind(above);
 1781   movfp(masm, pt, dst, min ? b : a, rt);
 1782   __ jmp(done);
 1783 
 1784   __ bind(nan);
 1785   if (pt == fp_prec_hlf) {
 1786     __ movl(rt, 0x00007e00); // Float16.NaN
 1787     __ evmovw(dst, rt);
 1788   } else if (pt == fp_prec_flt) {
 1789     __ movl(rt, 0x7fc00000); // Float.NaN
 1790     __ movdl(dst, rt);
 1791   } else {
 1792     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1793     __ movdq(dst, rt);
 1794   }
 1795   __ jmp(done);
 1796 
 1797   __ bind(below);
 1798   movfp(masm, pt, dst, min ? a : b, rt);
 1799 
 1800   __ bind(done);
 1801 }
 1802 
 1803 //=============================================================================
 1804 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1805 
 1806 int ConstantTable::calculate_table_base_offset() const {
 1807   return 0;  // absolute addressing, no offset
 1808 }
 1809 
 1810 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1811 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1812   ShouldNotReachHere();
 1813 }
 1814 
 1815 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1816   // Empty encoding
 1817 }
 1818 
 1819 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1820   return 0;
 1821 }
 1822 
 1823 #ifndef PRODUCT
 1824 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1825   st->print("# MachConstantBaseNode (empty encoding)");
 1826 }
 1827 #endif
 1828 
 1829 
 1830 //=============================================================================
 1831 #ifndef PRODUCT
 1832 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1833   Compile* C = ra_->C;
 1834 
 1835   int framesize = C->output()->frame_size_in_bytes();
 1836   int bangsize = C->output()->bang_size_in_bytes();
 1837   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1838   // Remove wordSize for return addr which is already pushed.
 1839   framesize -= wordSize;
 1840 
 1841   if (C->output()->need_stack_bang(bangsize)) {
 1842     framesize -= wordSize;
 1843     st->print("# stack bang (%d bytes)", bangsize);
 1844     st->print("\n\t");
 1845     st->print("pushq   rbp\t# Save rbp");
 1846     if (PreserveFramePointer) {
 1847         st->print("\n\t");
 1848         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1849     }
 1850     if (framesize) {
 1851       st->print("\n\t");
 1852       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1853     }
 1854   } else {
 1855     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1856     st->print("\n\t");
 1857     framesize -= wordSize;
 1858     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1859     if (PreserveFramePointer) {
 1860       st->print("\n\t");
 1861       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1862       if (framesize > 0) {
 1863         st->print("\n\t");
 1864         st->print("addq    rbp, #%d", framesize);
 1865       }
 1866     }
 1867   }
 1868 
 1869   if (VerifyStackAtCalls) {
 1870     st->print("\n\t");
 1871     framesize -= wordSize;
 1872     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1873 #ifdef ASSERT
 1874     st->print("\n\t");
 1875     st->print("# stack alignment check");
 1876 #endif
 1877   }
 1878   if (C->stub_function() != nullptr) {
 1879     st->print("\n\t");
 1880     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1881     st->print("\n\t");
 1882     st->print("je      fast_entry\t");
 1883     st->print("\n\t");
 1884     st->print("call    #nmethod_entry_barrier_stub\t");
 1885     st->print("\n\tfast_entry:");
 1886   }
 1887   st->cr();
 1888 }
 1889 #endif
 1890 
 1891 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1892   Compile* C = ra_->C;
 1893 
 1894   __ verified_entry(C);
 1895 
 1896   if (ra_->C->stub_function() == nullptr) {
 1897     __ entry_barrier();
 1898   }
 1899 
 1900   if (!Compile::current()->output()->in_scratch_emit_size()) {
 1901     __ bind(*_verified_entry);
 1902   }
 1903 
 1904   C->output()->set_frame_complete(__ offset());
 1905 
 1906   if (C->has_mach_constant_base_node()) {
 1907     // NOTE: We set the table base offset here because users might be
 1908     // emitted before MachConstantBaseNode.
 1909     ConstantTable& constant_table = C->output()->constant_table();
 1910     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1911   }
 1912 }
 1913 
 1914 
 1915 int MachPrologNode::reloc() const
 1916 {
 1917   return 0; // a large enough number
 1918 }
 1919 
 1920 //=============================================================================
 1921 #ifndef PRODUCT
 1922 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1923 {
 1924   Compile* C = ra_->C;
 1925   if (generate_vzeroupper(C)) {
 1926     st->print("vzeroupper");
 1927     st->cr(); st->print("\t");
 1928   }
 1929 
 1930   int framesize = C->output()->frame_size_in_bytes();
 1931   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1932   // Remove word for return adr already pushed
 1933   // and RBP
 1934   framesize -= 2*wordSize;
 1935 
 1936   if (framesize) {
 1937     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1938     st->print("\t");
 1939   }
 1940 
 1941   st->print_cr("popq    rbp");
 1942   if (do_polling() && C->is_method_compilation()) {
 1943     st->print("\t");
 1944     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1945                  "ja      #safepoint_stub\t"
 1946                  "# Safepoint: poll for GC");
 1947   }
 1948 }
 1949 #endif
 1950 
 1951 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1952 {
 1953   Compile* C = ra_->C;
 1954 
 1955   if (generate_vzeroupper(C)) {
 1956     // Clear upper bits of YMM registers when current compiled code uses
 1957     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1958     __ vzeroupper();
 1959   }
 1960 
 1961   // Subtract two words to account for return address and rbp
 1962   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1963   __ remove_frame(initial_framesize, C->needs_stack_repair());
 1964 
 1965   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1966     __ reserved_stack_check();
 1967   }
 1968 
 1969   if (do_polling() && C->is_method_compilation()) {
 1970     Label dummy_label;
 1971     Label* code_stub = &dummy_label;
 1972     if (!C->output()->in_scratch_emit_size()) {
 1973       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1974       C->output()->add_stub(stub);
 1975       code_stub = &stub->entry();
 1976     }
 1977     __ relocate(relocInfo::poll_return_type);
 1978     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1979   }
 1980 }
 1981 
 1982 int MachEpilogNode::reloc() const
 1983 {
 1984   return 2; // a large enough number
 1985 }
 1986 
 1987 const Pipeline* MachEpilogNode::pipeline() const
 1988 {
 1989   return MachNode::pipeline_class();
 1990 }
 1991 
 1992 //=============================================================================
 1993 
 1994 enum RC {
 1995   rc_bad,
 1996   rc_int,
 1997   rc_kreg,
 1998   rc_float,
 1999   rc_stack
 2000 };
 2001 
 2002 static enum RC rc_class(OptoReg::Name reg)
 2003 {
 2004   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2005 
 2006   if (OptoReg::is_stack(reg)) return rc_stack;
 2007 
 2008   VMReg r = OptoReg::as_VMReg(reg);
 2009 
 2010   if (r->is_Register()) return rc_int;
 2011 
 2012   if (r->is_KRegister()) return rc_kreg;
 2013 
 2014   assert(r->is_XMMRegister(), "must be");
 2015   return rc_float;
 2016 }
 2017 
 2018 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2019 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2020                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2021 
 2022 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2023                      int stack_offset, int reg, uint ireg, outputStream* st);
 2024 
 2025 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2026                                       int dst_offset, uint ireg, outputStream* st) {
 2027   if (masm) {
 2028     switch (ireg) {
 2029     case Op_VecS:
 2030       __ movq(Address(rsp, -8), rax);
 2031       __ movl(rax, Address(rsp, src_offset));
 2032       __ movl(Address(rsp, dst_offset), rax);
 2033       __ movq(rax, Address(rsp, -8));
 2034       break;
 2035     case Op_VecD:
 2036       __ pushq(Address(rsp, src_offset));
 2037       __ popq (Address(rsp, dst_offset));
 2038       break;
 2039     case Op_VecX:
 2040       __ pushq(Address(rsp, src_offset));
 2041       __ popq (Address(rsp, dst_offset));
 2042       __ pushq(Address(rsp, src_offset+8));
 2043       __ popq (Address(rsp, dst_offset+8));
 2044       break;
 2045     case Op_VecY:
 2046       __ vmovdqu(Address(rsp, -32), xmm0);
 2047       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2048       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2049       __ vmovdqu(xmm0, Address(rsp, -32));
 2050       break;
 2051     case Op_VecZ:
 2052       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2053       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2054       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2055       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2056       break;
 2057     default:
 2058       ShouldNotReachHere();
 2059     }
 2060 #ifndef PRODUCT
 2061   } else {
 2062     switch (ireg) {
 2063     case Op_VecS:
 2064       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2065                 "movl    rax, [rsp + #%d]\n\t"
 2066                 "movl    [rsp + #%d], rax\n\t"
 2067                 "movq    rax, [rsp - #8]",
 2068                 src_offset, dst_offset);
 2069       break;
 2070     case Op_VecD:
 2071       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2072                 "popq    [rsp + #%d]",
 2073                 src_offset, dst_offset);
 2074       break;
 2075      case Op_VecX:
 2076       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2077                 "popq    [rsp + #%d]\n\t"
 2078                 "pushq   [rsp + #%d]\n\t"
 2079                 "popq    [rsp + #%d]",
 2080                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2081       break;
 2082     case Op_VecY:
 2083       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2084                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2085                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2086                 "vmovdqu xmm0, [rsp - #32]",
 2087                 src_offset, dst_offset);
 2088       break;
 2089     case Op_VecZ:
 2090       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2091                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2092                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2093                 "vmovdqu xmm0, [rsp - #64]",
 2094                 src_offset, dst_offset);
 2095       break;
 2096     default:
 2097       ShouldNotReachHere();
 2098     }
 2099 #endif
 2100   }
 2101 }
 2102 
 2103 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2104                                        PhaseRegAlloc* ra_,
 2105                                        bool do_size,
 2106                                        outputStream* st) const {
 2107   assert(masm != nullptr || st  != nullptr, "sanity");
 2108   // Get registers to move
 2109   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2110   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2111   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2112   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2113 
 2114   enum RC src_second_rc = rc_class(src_second);
 2115   enum RC src_first_rc = rc_class(src_first);
 2116   enum RC dst_second_rc = rc_class(dst_second);
 2117   enum RC dst_first_rc = rc_class(dst_first);
 2118 
 2119   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2120          "must move at least 1 register" );
 2121 
 2122   if (src_first == dst_first && src_second == dst_second) {
 2123     // Self copy, no move
 2124     return 0;
 2125   }
 2126   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_pvectmask() == nullptr) {
 2127     uint ireg = ideal_reg();
 2128     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2129     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2130     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2131       // mem -> mem
 2132       int src_offset = ra_->reg2offset(src_first);
 2133       int dst_offset = ra_->reg2offset(dst_first);
 2134       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2135     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2136       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2137     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2138       int stack_offset = ra_->reg2offset(dst_first);
 2139       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2140     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2141       int stack_offset = ra_->reg2offset(src_first);
 2142       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2143     } else {
 2144       ShouldNotReachHere();
 2145     }
 2146     return 0;
 2147   }
 2148   if (src_first_rc == rc_stack) {
 2149     // mem ->
 2150     if (dst_first_rc == rc_stack) {
 2151       // mem -> mem
 2152       assert(src_second != dst_first, "overlap");
 2153       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2154           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2155         // 64-bit
 2156         int src_offset = ra_->reg2offset(src_first);
 2157         int dst_offset = ra_->reg2offset(dst_first);
 2158         if (masm) {
 2159           __ pushq(Address(rsp, src_offset));
 2160           __ popq (Address(rsp, dst_offset));
 2161 #ifndef PRODUCT
 2162         } else {
 2163           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2164                     "popq    [rsp + #%d]",
 2165                      src_offset, dst_offset);
 2166 #endif
 2167         }
 2168       } else {
 2169         // 32-bit
 2170         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2171         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2172         // No pushl/popl, so:
 2173         int src_offset = ra_->reg2offset(src_first);
 2174         int dst_offset = ra_->reg2offset(dst_first);
 2175         if (masm) {
 2176           __ movq(Address(rsp, -8), rax);
 2177           __ movl(rax, Address(rsp, src_offset));
 2178           __ movl(Address(rsp, dst_offset), rax);
 2179           __ movq(rax, Address(rsp, -8));
 2180 #ifndef PRODUCT
 2181         } else {
 2182           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2183                     "movl    rax, [rsp + #%d]\n\t"
 2184                     "movl    [rsp + #%d], rax\n\t"
 2185                     "movq    rax, [rsp - #8]",
 2186                      src_offset, dst_offset);
 2187 #endif
 2188         }
 2189       }
 2190       return 0;
 2191     } else if (dst_first_rc == rc_int) {
 2192       // mem -> gpr
 2193       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2194           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2195         // 64-bit
 2196         int offset = ra_->reg2offset(src_first);
 2197         if (masm) {
 2198           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2199 #ifndef PRODUCT
 2200         } else {
 2201           st->print("movq    %s, [rsp + #%d]\t# spill",
 2202                      Matcher::regName[dst_first],
 2203                      offset);
 2204 #endif
 2205         }
 2206       } else {
 2207         // 32-bit
 2208         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2209         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2210         int offset = ra_->reg2offset(src_first);
 2211         if (masm) {
 2212           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2213 #ifndef PRODUCT
 2214         } else {
 2215           st->print("movl    %s, [rsp + #%d]\t# spill",
 2216                      Matcher::regName[dst_first],
 2217                      offset);
 2218 #endif
 2219         }
 2220       }
 2221       return 0;
 2222     } else if (dst_first_rc == rc_float) {
 2223       // mem-> xmm
 2224       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2225           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2226         // 64-bit
 2227         int offset = ra_->reg2offset(src_first);
 2228         if (masm) {
 2229           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2230 #ifndef PRODUCT
 2231         } else {
 2232           st->print("%s  %s, [rsp + #%d]\t# spill",
 2233                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2234                      Matcher::regName[dst_first],
 2235                      offset);
 2236 #endif
 2237         }
 2238       } else {
 2239         // 32-bit
 2240         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2241         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2242         int offset = ra_->reg2offset(src_first);
 2243         if (masm) {
 2244           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2245 #ifndef PRODUCT
 2246         } else {
 2247           st->print("movss   %s, [rsp + #%d]\t# spill",
 2248                      Matcher::regName[dst_first],
 2249                      offset);
 2250 #endif
 2251         }
 2252       }
 2253       return 0;
 2254     } else if (dst_first_rc == rc_kreg) {
 2255       // mem -> kreg
 2256       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2257           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2258         // 64-bit
 2259         int offset = ra_->reg2offset(src_first);
 2260         if (masm) {
 2261           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2262 #ifndef PRODUCT
 2263         } else {
 2264           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2265                      Matcher::regName[dst_first],
 2266                      offset);
 2267 #endif
 2268         }
 2269       }
 2270       return 0;
 2271     }
 2272   } else if (src_first_rc == rc_int) {
 2273     // gpr ->
 2274     if (dst_first_rc == rc_stack) {
 2275       // gpr -> mem
 2276       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2277           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2278         // 64-bit
 2279         int offset = ra_->reg2offset(dst_first);
 2280         if (masm) {
 2281           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2282 #ifndef PRODUCT
 2283         } else {
 2284           st->print("movq    [rsp + #%d], %s\t# spill",
 2285                      offset,
 2286                      Matcher::regName[src_first]);
 2287 #endif
 2288         }
 2289       } else {
 2290         // 32-bit
 2291         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2292         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2293         int offset = ra_->reg2offset(dst_first);
 2294         if (masm) {
 2295           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2296 #ifndef PRODUCT
 2297         } else {
 2298           st->print("movl    [rsp + #%d], %s\t# spill",
 2299                      offset,
 2300                      Matcher::regName[src_first]);
 2301 #endif
 2302         }
 2303       }
 2304       return 0;
 2305     } else if (dst_first_rc == rc_int) {
 2306       // gpr -> gpr
 2307       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2308           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2309         // 64-bit
 2310         if (masm) {
 2311           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2312                   as_Register(Matcher::_regEncode[src_first]));
 2313 #ifndef PRODUCT
 2314         } else {
 2315           st->print("movq    %s, %s\t# spill",
 2316                      Matcher::regName[dst_first],
 2317                      Matcher::regName[src_first]);
 2318 #endif
 2319         }
 2320         return 0;
 2321       } else {
 2322         // 32-bit
 2323         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2324         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2325         if (masm) {
 2326           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2327                   as_Register(Matcher::_regEncode[src_first]));
 2328 #ifndef PRODUCT
 2329         } else {
 2330           st->print("movl    %s, %s\t# spill",
 2331                      Matcher::regName[dst_first],
 2332                      Matcher::regName[src_first]);
 2333 #endif
 2334         }
 2335         return 0;
 2336       }
 2337     } else if (dst_first_rc == rc_float) {
 2338       // gpr -> xmm
 2339       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2340           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2341         // 64-bit
 2342         if (masm) {
 2343           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2344 #ifndef PRODUCT
 2345         } else {
 2346           st->print("movdq   %s, %s\t# spill",
 2347                      Matcher::regName[dst_first],
 2348                      Matcher::regName[src_first]);
 2349 #endif
 2350         }
 2351       } else {
 2352         // 32-bit
 2353         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2354         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2355         if (masm) {
 2356           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2357 #ifndef PRODUCT
 2358         } else {
 2359           st->print("movdl   %s, %s\t# spill",
 2360                      Matcher::regName[dst_first],
 2361                      Matcher::regName[src_first]);
 2362 #endif
 2363         }
 2364       }
 2365       return 0;
 2366     } else if (dst_first_rc == rc_kreg) {
 2367       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2368           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2369         // 64-bit
 2370         if (masm) {
 2371           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2372   #ifndef PRODUCT
 2373         } else {
 2374            st->print("kmovq   %s, %s\t# spill",
 2375                        Matcher::regName[dst_first],
 2376                        Matcher::regName[src_first]);
 2377   #endif
 2378         }
 2379       }
 2380       Unimplemented();
 2381       return 0;
 2382     }
 2383   } else if (src_first_rc == rc_float) {
 2384     // xmm ->
 2385     if (dst_first_rc == rc_stack) {
 2386       // xmm -> mem
 2387       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2388           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2389         // 64-bit
 2390         int offset = ra_->reg2offset(dst_first);
 2391         if (masm) {
 2392           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2393 #ifndef PRODUCT
 2394         } else {
 2395           st->print("movsd   [rsp + #%d], %s\t# spill",
 2396                      offset,
 2397                      Matcher::regName[src_first]);
 2398 #endif
 2399         }
 2400       } else {
 2401         // 32-bit
 2402         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2403         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2404         int offset = ra_->reg2offset(dst_first);
 2405         if (masm) {
 2406           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2407 #ifndef PRODUCT
 2408         } else {
 2409           st->print("movss   [rsp + #%d], %s\t# spill",
 2410                      offset,
 2411                      Matcher::regName[src_first]);
 2412 #endif
 2413         }
 2414       }
 2415       return 0;
 2416     } else if (dst_first_rc == rc_int) {
 2417       // xmm -> gpr
 2418       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2419           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2420         // 64-bit
 2421         if (masm) {
 2422           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2423 #ifndef PRODUCT
 2424         } else {
 2425           st->print("movdq   %s, %s\t# spill",
 2426                      Matcher::regName[dst_first],
 2427                      Matcher::regName[src_first]);
 2428 #endif
 2429         }
 2430       } else {
 2431         // 32-bit
 2432         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2433         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2434         if (masm) {
 2435           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2436 #ifndef PRODUCT
 2437         } else {
 2438           st->print("movdl   %s, %s\t# spill",
 2439                      Matcher::regName[dst_first],
 2440                      Matcher::regName[src_first]);
 2441 #endif
 2442         }
 2443       }
 2444       return 0;
 2445     } else if (dst_first_rc == rc_float) {
 2446       // xmm -> xmm
 2447       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2448           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2449         // 64-bit
 2450         if (masm) {
 2451           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2452 #ifndef PRODUCT
 2453         } else {
 2454           st->print("%s  %s, %s\t# spill",
 2455                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2456                      Matcher::regName[dst_first],
 2457                      Matcher::regName[src_first]);
 2458 #endif
 2459         }
 2460       } else {
 2461         // 32-bit
 2462         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2463         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2464         if (masm) {
 2465           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2466 #ifndef PRODUCT
 2467         } else {
 2468           st->print("%s  %s, %s\t# spill",
 2469                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2470                      Matcher::regName[dst_first],
 2471                      Matcher::regName[src_first]);
 2472 #endif
 2473         }
 2474       }
 2475       return 0;
 2476     } else if (dst_first_rc == rc_kreg) {
 2477       assert(false, "Illegal spilling");
 2478       return 0;
 2479     }
 2480   } else if (src_first_rc == rc_kreg) {
 2481     if (dst_first_rc == rc_stack) {
 2482       // mem -> kreg
 2483       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2484           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2485         // 64-bit
 2486         int offset = ra_->reg2offset(dst_first);
 2487         if (masm) {
 2488           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2489 #ifndef PRODUCT
 2490         } else {
 2491           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2492                      offset,
 2493                      Matcher::regName[src_first]);
 2494 #endif
 2495         }
 2496       }
 2497       return 0;
 2498     } else if (dst_first_rc == rc_int) {
 2499       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2500           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2501         // 64-bit
 2502         if (masm) {
 2503           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2504 #ifndef PRODUCT
 2505         } else {
 2506          st->print("kmovq   %s, %s\t# spill",
 2507                      Matcher::regName[dst_first],
 2508                      Matcher::regName[src_first]);
 2509 #endif
 2510         }
 2511       }
 2512       Unimplemented();
 2513       return 0;
 2514     } else if (dst_first_rc == rc_kreg) {
 2515       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2516           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2517         // 64-bit
 2518         if (masm) {
 2519           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2520 #ifndef PRODUCT
 2521         } else {
 2522          st->print("kmovq   %s, %s\t# spill",
 2523                      Matcher::regName[dst_first],
 2524                      Matcher::regName[src_first]);
 2525 #endif
 2526         }
 2527       }
 2528       return 0;
 2529     } else if (dst_first_rc == rc_float) {
 2530       assert(false, "Illegal spill");
 2531       return 0;
 2532     }
 2533   }
 2534 
 2535   assert(0," foo ");
 2536   Unimplemented();
 2537   return 0;
 2538 }
 2539 
 2540 #ifndef PRODUCT
 2541 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2542   implementation(nullptr, ra_, false, st);
 2543 }
 2544 #endif
 2545 
 2546 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2547   implementation(masm, ra_, false, nullptr);
 2548 }
 2549 
 2550 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2551   return MachNode::size(ra_);
 2552 }
 2553 
 2554 //=============================================================================
 2555 #ifndef PRODUCT
 2556 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2557 {
 2558   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2559   int reg = ra_->get_reg_first(this);
 2560   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2561             Matcher::regName[reg], offset);
 2562 }
 2563 #endif
 2564 
 2565 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2566 {
 2567   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2568   int reg = ra_->get_encode(this);
 2569 
 2570   __ lea(as_Register(reg), Address(rsp, offset));
 2571 }
 2572 
 2573 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2574 {
 2575   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2576   if (ra_->get_encode(this) > 15) {
 2577     return (offset < 0x80) ? 6 : 9; // REX2
 2578   } else {
 2579     return (offset < 0x80) ? 5 : 8; // REX
 2580   }
 2581 }
 2582 
 2583 //=============================================================================
 2584 #ifndef PRODUCT
 2585 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2586 {
 2587   st->print_cr("MachVEPNode");
 2588 }
 2589 #endif
 2590 
 2591 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2592 {
 2593   CodeBuffer* cbuf = masm->code();
 2594   if (!_verified) {
 2595     __ ic_check(1);
 2596   } else {
 2597     if (ra_->C->stub_function() == nullptr) {
 2598       // Emit the entry barrier in a temporary frame before unpacking because
 2599       // it can deopt, which would require packing the scalarized args again.
 2600       __ verified_entry(ra_->C, 0);
 2601       __ entry_barrier();
 2602       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 2603       __ remove_frame(initial_framesize, false);
 2604     }
 2605     // Unpack inline type args passed as oop and then jump to
 2606     // the verified entry point (skipping the unverified entry).
 2607     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 2608     // Emit code for verified entry and save increment for stack repair on return
 2609     __ verified_entry(ra_->C, sp_inc);
 2610     if (Compile::current()->output()->in_scratch_emit_size()) {
 2611       Label dummy_verified_entry;
 2612       __ jmp(dummy_verified_entry);
 2613     } else {
 2614       __ jmp(*_verified_entry);
 2615     }
 2616   }
 2617   if (ra_->C->stub_function() == nullptr) {
 2618     // Pad so that the next call to MachVEPNode::emit() starts out with the
 2619     // correct alignment.  This is needed by entry_barrier() to align the
 2620     // compare.  But unfortunately we need to align all 4 MachVEPNodes because
 2621     // entry point offsets are computed using scratch_emit_size(), so starting
 2622     // alignment must match the alignment of the scratch buffer, otherwise the sizes
 2623     // will be off.
 2624     __ align(4);
 2625   }
 2626 }
 2627 
 2628 //=============================================================================
 2629 #ifndef PRODUCT
 2630 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2631 {
 2632   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2633   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2634   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2635 }
 2636 #endif
 2637 
 2638 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2639 {
 2640   __ ic_check(InteriorEntryAlignment);
 2641 }
 2642 
 2643 
 2644 //=============================================================================
 2645 
 2646 bool Matcher::supports_vector_calling_convention(void) {
 2647   return EnableVectorSupport;
 2648 }
 2649 
 2650 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2651   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2652 }
 2653 
 2654 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2655   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2656 }
 2657 
 2658 #ifdef ASSERT
 2659 static bool is_ndd_demotable(const MachNode* mdef) {
 2660   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2661 }
 2662 #endif
 2663 
 2664 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2665                                             int oper_index) {
 2666   if (mdef == nullptr) {
 2667     return false;
 2668   }
 2669 
 2670   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2671       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2672     assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
 2673     assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
 2674     return false;
 2675   }
 2676 
 2677   // Complex memory operand covers multiple incoming edges needed for
 2678   // address computation. Biasing def towards any address component will not
 2679   // result in NDD demotion by assembler.
 2680   if (mdef->operand_num_edges(oper_index) != 1) {
 2681     return false;
 2682   }
 2683 
 2684   // Demotion candidate must be register mask compatible with definition.
 2685   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2686   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2687     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2688     return false;
 2689   }
 2690 
 2691   switch (oper_index) {
 2692   // First operand of MachNode corresponding to Intel APX NDD selection
 2693   // pattern can share its assigned register with definition operand if
 2694   // their live ranges do not overlap. In such a scenario we can demote
 2695   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2696   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2697   // are decorated with a special flag by instruction selector.
 2698   case 1:
 2699     return is_ndd_demotable_opr1(mdef);
 2700 
 2701   // Definition operand of commutative operation can be biased towards second
 2702   // operand.
 2703   case 2:
 2704     return is_ndd_demotable_opr2(mdef);
 2705 
 2706   // Current scheme only selects up to two biasing candidates
 2707   default:
 2708     assert(false, "unhandled operand index: %s", mdef->Name());
 2709     break;
 2710   }
 2711 
 2712   return false;
 2713 }
 2714 
 2715 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2716   assert(EnableVectorSupport, "sanity");
 2717   int lo = XMM0_num;
 2718   int hi = XMM0b_num;
 2719   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2720   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2721   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2722   return OptoRegPair(hi, lo);
 2723 }
 2724 
 2725 // Is this branch offset short enough that a short branch can be used?
 2726 //
 2727 // NOTE: If the platform does not provide any short branch variants, then
 2728 //       this method should return false for offset 0.
 2729 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2730   // The passed offset is relative to address of the branch.
 2731   // On 86 a branch displacement is calculated relative to address
 2732   // of a next instruction.
 2733   offset -= br_size;
 2734 
 2735   // the short version of jmpConUCF2 contains multiple branches,
 2736   // making the reach slightly less
 2737   if (rule == jmpConUCF2_rule)
 2738     return (-126 <= offset && offset <= 125);
 2739   return (-128 <= offset && offset <= 127);
 2740 }
 2741 
 2742 #ifdef ASSERT
 2743 // Return whether or not this register is ever used as an argument.
 2744 bool Matcher::can_be_java_arg(int reg)
 2745 {
 2746   return
 2747     reg ==  RDI_num || reg == RDI_H_num ||
 2748     reg ==  RSI_num || reg == RSI_H_num ||
 2749     reg ==  RDX_num || reg == RDX_H_num ||
 2750     reg ==  RCX_num || reg == RCX_H_num ||
 2751     reg ==   R8_num || reg ==  R8_H_num ||
 2752     reg ==   R9_num || reg ==  R9_H_num ||
 2753     reg ==  R12_num || reg == R12_H_num ||
 2754     reg == XMM0_num || reg == XMM0b_num ||
 2755     reg == XMM1_num || reg == XMM1b_num ||
 2756     reg == XMM2_num || reg == XMM2b_num ||
 2757     reg == XMM3_num || reg == XMM3b_num ||
 2758     reg == XMM4_num || reg == XMM4b_num ||
 2759     reg == XMM5_num || reg == XMM5b_num ||
 2760     reg == XMM6_num || reg == XMM6b_num ||
 2761     reg == XMM7_num || reg == XMM7b_num;
 2762 }
 2763 #endif
 2764 
 2765 uint Matcher::int_pressure_limit()
 2766 {
 2767   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2768 }
 2769 
 2770 uint Matcher::float_pressure_limit()
 2771 {
 2772   // After experiment around with different values, the following default threshold
 2773   // works best for LCM's register pressure scheduling on x64.
 2774   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2775   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2776   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2777 }
 2778 
 2779 // Register for DIVI projection of divmodI
 2780 const RegMask& Matcher::divI_proj_mask() {
 2781   return INT_RAX_REG_mask();
 2782 }
 2783 
 2784 // Register for MODI projection of divmodI
 2785 const RegMask& Matcher::modI_proj_mask() {
 2786   return INT_RDX_REG_mask();
 2787 }
 2788 
 2789 // Register for DIVL projection of divmodL
 2790 const RegMask& Matcher::divL_proj_mask() {
 2791   return LONG_RAX_REG_mask();
 2792 }
 2793 
 2794 // Register for MODL projection of divmodL
 2795 const RegMask& Matcher::modL_proj_mask() {
 2796   return LONG_RDX_REG_mask();
 2797 }
 2798 
 2799 %}
 2800 
 2801 source_hpp %{
 2802 // Header information of the source block.
 2803 // Method declarations/definitions which are used outside
 2804 // the ad-scope can conveniently be defined here.
 2805 //
 2806 // To keep related declarations/definitions/uses close together,
 2807 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2808 
 2809 #include "runtime/vm_version.hpp"
 2810 
 2811 class NativeJump;
 2812 
 2813 class CallStubImpl {
 2814 
 2815   //--------------------------------------------------------------
 2816   //---<  Used for optimization in Compile::shorten_branches  >---
 2817   //--------------------------------------------------------------
 2818 
 2819  public:
 2820   // Size of call trampoline stub.
 2821   static uint size_call_trampoline() {
 2822     return 0; // no call trampolines on this platform
 2823   }
 2824 
 2825   // number of relocations needed by a call trampoline stub
 2826   static uint reloc_call_trampoline() {
 2827     return 0; // no call trampolines on this platform
 2828   }
 2829 };
 2830 
 2831 class HandlerImpl {
 2832 
 2833  public:
 2834 
 2835   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2836 
 2837   static uint size_deopt_handler() {
 2838     // one call and one jmp.
 2839     return 7;
 2840   }
 2841 };
 2842 
 2843 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2844   switch(bytes) {
 2845     case  4: // fall-through
 2846     case  8: // fall-through
 2847     case 16: return Assembler::AVX_128bit;
 2848     case 32: return Assembler::AVX_256bit;
 2849     case 64: return Assembler::AVX_512bit;
 2850 
 2851     default: {
 2852       ShouldNotReachHere();
 2853       return Assembler::AVX_NoVec;
 2854     }
 2855   }
 2856 }
 2857 
 2858 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2859   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2860 }
 2861 
 2862 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2863   uint def_idx = use->operand_index(opnd);
 2864   Node* def = use->in(def_idx);
 2865   return vector_length_encoding(def);
 2866 }
 2867 
 2868 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2869   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2870          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2871 }
 2872 
 2873 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2874   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2875            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2876 }
 2877 
 2878 class Node::PD {
 2879 public:
 2880   enum NodeFlags : uint64_t {
 2881     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2882     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2883     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2884     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2885     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2886     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2887     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2888     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2889     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2890     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2891     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2892     Flag_ndd_demotable_opr1   = Node::_last_flag << 12,
 2893     Flag_ndd_demotable_opr2   = Node::_last_flag << 13,
 2894     _last_flag                = Flag_ndd_demotable_opr2
 2895   };
 2896 };
 2897 
 2898 %} // end source_hpp
 2899 
 2900 source %{
 2901 
 2902 #include "opto/addnode.hpp"
 2903 #include "c2_intelJccErratum_x86.hpp"
 2904 
 2905 void PhaseOutput::pd_perform_mach_node_analysis() {
 2906   if (VM_Version::has_intel_jcc_erratum()) {
 2907     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2908     _buf_sizes._code += extra_padding;
 2909   }
 2910 }
 2911 
 2912 int MachNode::pd_alignment_required() const {
 2913   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2914     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2915     return IntelJccErratum::largest_jcc_size() + 1;
 2916   } else {
 2917     return 1;
 2918   }
 2919 }
 2920 
 2921 int MachNode::compute_padding(int current_offset) const {
 2922   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2923     Compile* C = Compile::current();
 2924     PhaseOutput* output = C->output();
 2925     Block* block = output->block();
 2926     int index = output->index();
 2927     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2928   } else {
 2929     return 0;
 2930   }
 2931 }
 2932 
 2933 // Emit deopt handler code.
 2934 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2935 
 2936   // Note that the code buffer's insts_mark is always relative to insts.
 2937   // That's why we must use the macroassembler to generate a handler.
 2938   address base = __ start_a_stub(size_deopt_handler());
 2939   if (base == nullptr) {
 2940     ciEnv::current()->record_failure("CodeCache is full");
 2941     return 0;  // CodeBuffer::expand failed
 2942   }
 2943   int offset = __ offset();
 2944 
 2945   Label start;
 2946   __ bind(start);
 2947 
 2948   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2949 
 2950   int entry_offset = __ offset();
 2951 
 2952   __ jmp(start);
 2953 
 2954   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2955   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2956          "out of bounds read in post-call NOP check");
 2957   __ end_a_stub();
 2958   return entry_offset;
 2959 }
 2960 
 2961 static Assembler::Width widthForType(BasicType bt) {
 2962   if (bt == T_BYTE) {
 2963     return Assembler::B;
 2964   } else if (bt == T_SHORT) {
 2965     return Assembler::W;
 2966   } else if (bt == T_INT) {
 2967     return Assembler::D;
 2968   } else {
 2969     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2970     return Assembler::Q;
 2971   }
 2972 }
 2973 
 2974 //=============================================================================
 2975 
 2976   // Float masks come from different places depending on platform.
 2977   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2978   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2979   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2980   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2981   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2982   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2983   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2984   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2985   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2986   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2987   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2988   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2989   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2990   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2991   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2992   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2993   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2994   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 2995   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 2996 
 2997 //=============================================================================
 2998 bool Matcher::match_rule_supported(int opcode) {
 2999   if (!has_match_rule(opcode)) {
 3000     return false; // no match rule present
 3001   }
 3002   switch (opcode) {
 3003     case Op_AbsVL:
 3004     case Op_StoreVectorScatter:
 3005       if (UseAVX < 3) {
 3006         return false;
 3007       }
 3008       break;
 3009     case Op_PopCountI:
 3010     case Op_PopCountL:
 3011       if (!UsePopCountInstruction) {
 3012         return false;
 3013       }
 3014       break;
 3015     case Op_PopCountVI:
 3016       if (UseAVX < 2) {
 3017         return false;
 3018       }
 3019       break;
 3020     case Op_CompressV:
 3021     case Op_ExpandV:
 3022     case Op_PopCountVL:
 3023       if (UseAVX < 2) {
 3024         return false;
 3025       }
 3026       break;
 3027     case Op_MulVI:
 3028       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3029         return false;
 3030       }
 3031       break;
 3032     case Op_MulVL:
 3033       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3034         return false;
 3035       }
 3036       break;
 3037     case Op_MulReductionVL:
 3038       if (VM_Version::supports_avx512dq() == false) {
 3039         return false;
 3040       }
 3041       break;
 3042     case Op_AbsVB:
 3043     case Op_AbsVS:
 3044     case Op_AbsVI:
 3045     case Op_AddReductionVI:
 3046     case Op_AndReductionV:
 3047     case Op_OrReductionV:
 3048     case Op_XorReductionV:
 3049       if (UseSSE < 3) { // requires at least SSSE3
 3050         return false;
 3051       }
 3052       break;
 3053     case Op_MaxHF:
 3054     case Op_MinHF:
 3055       if (!VM_Version::supports_avx512vlbw()) {
 3056         return false;
 3057       }  // fallthrough
 3058     case Op_AddHF:
 3059     case Op_DivHF:
 3060     case Op_FmaHF:
 3061     case Op_MulHF:
 3062     case Op_ReinterpretS2HF:
 3063     case Op_ReinterpretHF2S:
 3064     case Op_SubHF:
 3065     case Op_SqrtHF:
 3066       if (!VM_Version::supports_avx512_fp16()) {
 3067         return false;
 3068       }
 3069       break;
 3070     case Op_VectorLoadShuffle:
 3071     case Op_VectorRearrange:
 3072     case Op_MulReductionVI:
 3073       if (UseSSE < 4) { // requires at least SSE4
 3074         return false;
 3075       }
 3076       break;
 3077     case Op_IsInfiniteF:
 3078     case Op_IsInfiniteD:
 3079       if (!VM_Version::supports_avx512dq()) {
 3080         return false;
 3081       }
 3082       break;
 3083     case Op_SqrtVD:
 3084     case Op_SqrtVF:
 3085     case Op_VectorMaskCmp:
 3086     case Op_VectorCastB2X:
 3087     case Op_VectorCastS2X:
 3088     case Op_VectorCastI2X:
 3089     case Op_VectorCastL2X:
 3090     case Op_VectorCastF2X:
 3091     case Op_VectorCastD2X:
 3092     case Op_VectorUCastB2X:
 3093     case Op_VectorUCastS2X:
 3094     case Op_VectorUCastI2X:
 3095     case Op_VectorMaskCast:
 3096       if (UseAVX < 1) { // enabled for AVX only
 3097         return false;
 3098       }
 3099       break;
 3100     case Op_PopulateIndex:
 3101       if (UseAVX < 2) {
 3102         return false;
 3103       }
 3104       break;
 3105     case Op_RoundVF:
 3106       if (UseAVX < 2) { // enabled for AVX2 only
 3107         return false;
 3108       }
 3109       break;
 3110     case Op_RoundVD:
 3111       if (UseAVX < 3) {
 3112         return false;  // enabled for AVX3 only
 3113       }
 3114       break;
 3115     case Op_CompareAndSwapL:
 3116     case Op_CompareAndSwapP:
 3117       break;
 3118     case Op_StrIndexOf:
 3119       if (!UseSSE42Intrinsics) {
 3120         return false;
 3121       }
 3122       break;
 3123     case Op_StrIndexOfChar:
 3124       if (!UseSSE42Intrinsics) {
 3125         return false;
 3126       }
 3127       break;
 3128     case Op_OnSpinWait:
 3129       if (VM_Version::supports_on_spin_wait() == false) {
 3130         return false;
 3131       }
 3132       break;
 3133     case Op_MulVB:
 3134     case Op_LShiftVB:
 3135     case Op_RShiftVB:
 3136     case Op_URShiftVB:
 3137     case Op_VectorInsert:
 3138     case Op_VectorLoadMask:
 3139     case Op_VectorStoreMask:
 3140     case Op_VectorBlend:
 3141       if (UseSSE < 4) {
 3142         return false;
 3143       }
 3144       break;
 3145     case Op_MaxD:
 3146     case Op_MaxF:
 3147     case Op_MinD:
 3148     case Op_MinF:
 3149       if (UseAVX < 1) { // enabled for AVX only
 3150         return false;
 3151       }
 3152       break;
 3153     case Op_CacheWB:
 3154     case Op_CacheWBPreSync:
 3155     case Op_CacheWBPostSync:
 3156       if (!VM_Version::supports_data_cache_line_flush()) {
 3157         return false;
 3158       }
 3159       break;
 3160     case Op_ExtractB:
 3161     case Op_ExtractL:
 3162     case Op_ExtractI:
 3163     case Op_RoundDoubleMode:
 3164       if (UseSSE < 4) {
 3165         return false;
 3166       }
 3167       break;
 3168     case Op_RoundDoubleModeV:
 3169       if (VM_Version::supports_avx() == false) {
 3170         return false; // 128bit vroundpd is not available
 3171       }
 3172       break;
 3173     case Op_LoadVectorGather:
 3174     case Op_LoadVectorGatherMasked:
 3175       if (UseAVX < 2) {
 3176         return false;
 3177       }
 3178       break;
 3179     case Op_FmaF:
 3180     case Op_FmaD:
 3181     case Op_FmaVD:
 3182     case Op_FmaVF:
 3183       if (!UseFMA) {
 3184         return false;
 3185       }
 3186       break;
 3187     case Op_MacroLogicV:
 3188       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3189         return false;
 3190       }
 3191       break;
 3192 
 3193     case Op_VectorCmpMasked:
 3194     case Op_VectorMaskGen:
 3195       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3196         return false;
 3197       }
 3198       break;
 3199     case Op_VectorMaskFirstTrue:
 3200     case Op_VectorMaskLastTrue:
 3201     case Op_VectorMaskTrueCount:
 3202     case Op_VectorMaskToLong:
 3203       if (UseAVX < 1) {
 3204          return false;
 3205       }
 3206       break;
 3207     case Op_RoundF:
 3208     case Op_RoundD:
 3209       break;
 3210     case Op_CopySignD:
 3211     case Op_CopySignF:
 3212       if (UseAVX < 3)  {
 3213         return false;
 3214       }
 3215       if (!VM_Version::supports_avx512vl()) {
 3216         return false;
 3217       }
 3218       break;
 3219     case Op_CompressBits:
 3220     case Op_ExpandBits:
 3221       if (!VM_Version::supports_bmi2()) {
 3222         return false;
 3223       }
 3224       break;
 3225     case Op_CompressM:
 3226       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3227         return false;
 3228       }
 3229       break;
 3230     case Op_ConvF2HF:
 3231     case Op_ConvHF2F:
 3232       if (!VM_Version::supports_float16()) {
 3233         return false;
 3234       }
 3235       break;
 3236     case Op_VectorCastF2HF:
 3237     case Op_VectorCastHF2F:
 3238       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3239         return false;
 3240       }
 3241       break;
 3242   }
 3243   return true;  // Match rules are supported by default.
 3244 }
 3245 
 3246 //------------------------------------------------------------------------
 3247 
 3248 static inline bool is_pop_count_instr_target(BasicType bt) {
 3249   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3250          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3251 }
 3252 
 3253 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3254   return match_rule_supported_vector(opcode, vlen, bt);
 3255 }
 3256 
 3257 // Identify extra cases that we might want to provide match rules for vector nodes and
 3258 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3259 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3260   if (!match_rule_supported(opcode)) {
 3261     return false;
 3262   }
 3263   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3264   //   * SSE2 supports 128bit vectors for all types;
 3265   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3266   //   * AVX2 supports 256bit vectors for all types;
 3267   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3268   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3269   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3270   // And MaxVectorSize is taken into account as well.
 3271   if (!vector_size_supported(bt, vlen)) {
 3272     return false;
 3273   }
 3274   // Special cases which require vector length follow:
 3275   //   * implementation limitations
 3276   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3277   //   * 128bit vroundpd instruction is present only in AVX1
 3278   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3279   switch (opcode) {
 3280     case Op_MaxVHF:
 3281     case Op_MinVHF:
 3282       if (!VM_Version::supports_avx512bw()) {
 3283         return false;
 3284       }
 3285     case Op_AddVHF:
 3286     case Op_DivVHF:
 3287     case Op_FmaVHF:
 3288     case Op_MulVHF:
 3289     case Op_SubVHF:
 3290     case Op_SqrtVHF:
 3291       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3292         return false;
 3293       }
 3294       if (!VM_Version::supports_avx512_fp16()) {
 3295         return false;
 3296       }
 3297       break;
 3298     case Op_AbsVF:
 3299     case Op_NegVF:
 3300       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3301         return false; // 512bit vandps and vxorps are not available
 3302       }
 3303       break;
 3304     case Op_AbsVD:
 3305     case Op_NegVD:
 3306       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3307         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3308       }
 3309       break;
 3310     case Op_RotateRightV:
 3311     case Op_RotateLeftV:
 3312       if (bt != T_INT && bt != T_LONG) {
 3313         return false;
 3314       } // fallthrough
 3315     case Op_MacroLogicV:
 3316       if (!VM_Version::supports_evex() ||
 3317           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3318         return false;
 3319       }
 3320       break;
 3321     case Op_ClearArray:
 3322     case Op_VectorMaskGen:
 3323     case Op_VectorCmpMasked:
 3324       if (!VM_Version::supports_avx512bw()) {
 3325         return false;
 3326       }
 3327       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3328         return false;
 3329       }
 3330       break;
 3331     case Op_LoadVectorMasked:
 3332     case Op_StoreVectorMasked:
 3333       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3334         return false;
 3335       }
 3336       break;
 3337     case Op_UMinV:
 3338     case Op_UMaxV:
 3339       if (UseAVX == 0) {
 3340         return false;
 3341       }
 3342       break;
 3343     case Op_UMinReductionV:
 3344     case Op_UMaxReductionV:
 3345       if (UseAVX == 0) {
 3346         return false;
 3347       }
 3348       if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
 3349         return false;
 3350       }
 3351       if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
 3352         return false;
 3353       }
 3354       break;
 3355     case Op_MaxV:
 3356     case Op_MinV:
 3357       if (UseSSE < 4 && is_integral_type(bt)) {
 3358         return false;
 3359       }
 3360       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3361           // Float/Double intrinsics are enabled for AVX family currently.
 3362           if (UseAVX == 0) {
 3363             return false;
 3364           }
 3365           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3366             return false;
 3367           }
 3368       }
 3369       break;
 3370     case Op_CallLeafVector:
 3371       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3372         return false;
 3373       }
 3374       break;
 3375     case Op_AddReductionVI:
 3376       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3377         return false;
 3378       }
 3379       // fallthrough
 3380     case Op_AndReductionV:
 3381     case Op_OrReductionV:
 3382     case Op_XorReductionV:
 3383       if (is_subword_type(bt) && (UseSSE < 4)) {
 3384         return false;
 3385       }
 3386       break;
 3387     case Op_MinReductionV:
 3388     case Op_MaxReductionV:
 3389       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3390         return false;
 3391       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3392         return false;
 3393       }
 3394       // Float/Double intrinsics enabled for AVX family.
 3395       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3396         return false;
 3397       }
 3398       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3399         return false;
 3400       }
 3401       break;
 3402     case Op_VectorBlend:
 3403       if (UseAVX == 0 && size_in_bits < 128) {
 3404         return false;
 3405       }
 3406       break;
 3407     case Op_VectorTest:
 3408       if (UseSSE < 4) {
 3409         return false; // Implementation limitation
 3410       } else if (size_in_bits < 32) {
 3411         return false; // Implementation limitation
 3412       }
 3413       break;
 3414     case Op_VectorLoadShuffle:
 3415     case Op_VectorRearrange:
 3416       if(vlen == 2) {
 3417         return false; // Implementation limitation due to how shuffle is loaded
 3418       } else if (size_in_bits == 256 && UseAVX < 2) {
 3419         return false; // Implementation limitation
 3420       }
 3421       break;
 3422     case Op_VectorLoadMask:
 3423     case Op_VectorMaskCast:
 3424       if (size_in_bits == 256 && UseAVX < 2) {
 3425         return false; // Implementation limitation
 3426       }
 3427       // fallthrough
 3428     case Op_VectorStoreMask:
 3429       if (vlen == 2) {
 3430         return false; // Implementation limitation
 3431       }
 3432       break;
 3433     case Op_PopulateIndex:
 3434       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3435         return false;
 3436       }
 3437       break;
 3438     case Op_VectorCastB2X:
 3439     case Op_VectorCastS2X:
 3440     case Op_VectorCastI2X:
 3441       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3442         return false;
 3443       }
 3444       break;
 3445     case Op_VectorCastL2X:
 3446       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3447         return false;
 3448       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3449         return false;
 3450       }
 3451       break;
 3452     case Op_VectorCastF2X: {
 3453         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3454         // happen after intermediate conversion to integer and special handling
 3455         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3456         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3457         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3458           return false;
 3459         }
 3460       }
 3461       // fallthrough
 3462     case Op_VectorCastD2X:
 3463       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3464         return false;
 3465       }
 3466       break;
 3467     case Op_VectorCastF2HF:
 3468     case Op_VectorCastHF2F:
 3469       if (!VM_Version::supports_f16c() &&
 3470          ((!VM_Version::supports_evex() ||
 3471          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3472         return false;
 3473       }
 3474       break;
 3475     case Op_RoundVD:
 3476       if (!VM_Version::supports_avx512dq()) {
 3477         return false;
 3478       }
 3479       break;
 3480     case Op_MulReductionVI:
 3481       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3482         return false;
 3483       }
 3484       break;
 3485     case Op_LoadVectorGatherMasked:
 3486       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3487         return false;
 3488       }
 3489       if (is_subword_type(bt) &&
 3490          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3491           (size_in_bits < 64)                                      ||
 3492           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3493         return false;
 3494       }
 3495       break;
 3496     case Op_StoreVectorScatterMasked:
 3497     case Op_StoreVectorScatter:
 3498       if (is_subword_type(bt)) {
 3499         return false;
 3500       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3501         return false;
 3502       }
 3503       // fallthrough
 3504     case Op_LoadVectorGather:
 3505       if (!is_subword_type(bt) && size_in_bits == 64) {
 3506         return false;
 3507       }
 3508       if (is_subword_type(bt) && size_in_bits < 64) {
 3509         return false;
 3510       }
 3511       break;
 3512     case Op_SaturatingAddV:
 3513     case Op_SaturatingSubV:
 3514       if (UseAVX < 1) {
 3515         return false; // Implementation limitation
 3516       }
 3517       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3518         return false;
 3519       }
 3520       break;
 3521     case Op_SelectFromTwoVector:
 3522        if (size_in_bits < 128) {
 3523          return false;
 3524        }
 3525        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3526          return false;
 3527        }
 3528        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3529          return false;
 3530        }
 3531        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3532          return false;
 3533        }
 3534        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3535          return false;
 3536        }
 3537        break;
 3538     case Op_MaskAll:
 3539       if (!VM_Version::supports_evex()) {
 3540         return false;
 3541       }
 3542       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3543         return false;
 3544       }
 3545       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3546         return false;
 3547       }
 3548       break;
 3549     case Op_VectorMaskCmp:
 3550       if (vlen < 2 || size_in_bits < 32) {
 3551         return false;
 3552       }
 3553       break;
 3554     case Op_CompressM:
 3555       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3556         return false;
 3557       }
 3558       break;
 3559     case Op_CompressV:
 3560     case Op_ExpandV:
 3561       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3562         return false;
 3563       }
 3564       if (size_in_bits < 128 ) {
 3565         return false;
 3566       }
 3567     case Op_VectorLongToMask:
 3568       if (UseAVX < 1) {
 3569         return false;
 3570       }
 3571       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3572         return false;
 3573       }
 3574       break;
 3575     case Op_SignumVD:
 3576     case Op_SignumVF:
 3577       if (UseAVX < 1) {
 3578         return false;
 3579       }
 3580       break;
 3581     case Op_PopCountVI:
 3582     case Op_PopCountVL: {
 3583         if (!is_pop_count_instr_target(bt) &&
 3584             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3585           return false;
 3586         }
 3587       }
 3588       break;
 3589     case Op_ReverseV:
 3590     case Op_ReverseBytesV:
 3591       if (UseAVX < 2) {
 3592         return false;
 3593       }
 3594       break;
 3595     case Op_CountTrailingZerosV:
 3596     case Op_CountLeadingZerosV:
 3597       if (UseAVX < 2) {
 3598         return false;
 3599       }
 3600       break;
 3601   }
 3602   return true;  // Per default match rules are supported.
 3603 }
 3604 
 3605 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3606   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3607   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3608   // of their non-masked counterpart with mask edge being the differentiator.
 3609   // This routine does a strict check on the existence of masked operation patterns
 3610   // by returning a default false value for all the other opcodes apart from the
 3611   // ones whose masked instruction patterns are defined in this file.
 3612   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3613     return false;
 3614   }
 3615 
 3616   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3617   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3618     return false;
 3619   }
 3620   switch(opcode) {
 3621     // Unary masked operations
 3622     case Op_AbsVB:
 3623     case Op_AbsVS:
 3624       if(!VM_Version::supports_avx512bw()) {
 3625         return false;  // Implementation limitation
 3626       }
 3627     case Op_AbsVI:
 3628     case Op_AbsVL:
 3629       return true;
 3630 
 3631     // Ternary masked operations
 3632     case Op_FmaVF:
 3633     case Op_FmaVD:
 3634       return true;
 3635 
 3636     case Op_MacroLogicV:
 3637       if(bt != T_INT && bt != T_LONG) {
 3638         return false;
 3639       }
 3640       return true;
 3641 
 3642     // Binary masked operations
 3643     case Op_AddVB:
 3644     case Op_AddVS:
 3645     case Op_SubVB:
 3646     case Op_SubVS:
 3647     case Op_MulVS:
 3648     case Op_LShiftVS:
 3649     case Op_RShiftVS:
 3650     case Op_URShiftVS:
 3651       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3652       if (!VM_Version::supports_avx512bw()) {
 3653         return false;  // Implementation limitation
 3654       }
 3655       return true;
 3656 
 3657     case Op_MulVL:
 3658       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3659       if (!VM_Version::supports_avx512dq()) {
 3660         return false;  // Implementation limitation
 3661       }
 3662       return true;
 3663 
 3664     case Op_AndV:
 3665     case Op_OrV:
 3666     case Op_XorV:
 3667     case Op_RotateRightV:
 3668     case Op_RotateLeftV:
 3669       if (bt != T_INT && bt != T_LONG) {
 3670         return false; // Implementation limitation
 3671       }
 3672       return true;
 3673 
 3674     case Op_VectorLoadMask:
 3675       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3676       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3677         return false;
 3678       }
 3679       return true;
 3680 
 3681     case Op_AddVI:
 3682     case Op_AddVL:
 3683     case Op_AddVF:
 3684     case Op_AddVD:
 3685     case Op_SubVI:
 3686     case Op_SubVL:
 3687     case Op_SubVF:
 3688     case Op_SubVD:
 3689     case Op_MulVI:
 3690     case Op_MulVF:
 3691     case Op_MulVD:
 3692     case Op_DivVF:
 3693     case Op_DivVD:
 3694     case Op_SqrtVF:
 3695     case Op_SqrtVD:
 3696     case Op_LShiftVI:
 3697     case Op_LShiftVL:
 3698     case Op_RShiftVI:
 3699     case Op_RShiftVL:
 3700     case Op_URShiftVI:
 3701     case Op_URShiftVL:
 3702     case Op_LoadVectorMasked:
 3703     case Op_StoreVectorMasked:
 3704     case Op_LoadVectorGatherMasked:
 3705     case Op_StoreVectorScatterMasked:
 3706       return true;
 3707 
 3708     case Op_UMinV:
 3709     case Op_UMaxV:
 3710       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3711         return false;
 3712       } // fallthrough
 3713     case Op_MaxV:
 3714     case Op_MinV:
 3715       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3716         return false; // Implementation limitation
 3717       }
 3718       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3719         return false; // Implementation limitation
 3720       }
 3721       return true;
 3722     case Op_SaturatingAddV:
 3723     case Op_SaturatingSubV:
 3724       if (!is_subword_type(bt)) {
 3725         return false;
 3726       }
 3727       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3728         return false; // Implementation limitation
 3729       }
 3730       return true;
 3731 
 3732     case Op_VectorMaskCmp:
 3733       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3734         return false; // Implementation limitation
 3735       }
 3736       return true;
 3737 
 3738     case Op_VectorRearrange:
 3739       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3740         return false; // Implementation limitation
 3741       }
 3742       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3743         return false; // Implementation limitation
 3744       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3745         return false; // Implementation limitation
 3746       }
 3747       return true;
 3748 
 3749     // Binary Logical operations
 3750     case Op_AndVMask:
 3751     case Op_OrVMask:
 3752     case Op_XorVMask:
 3753       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3754         return false; // Implementation limitation
 3755       }
 3756       return true;
 3757 
 3758     case Op_PopCountVI:
 3759     case Op_PopCountVL:
 3760       if (!is_pop_count_instr_target(bt)) {
 3761         return false;
 3762       }
 3763       return true;
 3764 
 3765     case Op_MaskAll:
 3766       return true;
 3767 
 3768     case Op_CountLeadingZerosV:
 3769       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3770         return true;
 3771       }
 3772     default:
 3773       return false;
 3774   }
 3775 }
 3776 
 3777 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3778   return false;
 3779 }
 3780 
 3781 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3782 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3783   switch (elem_bt) {
 3784     case T_BYTE:  return false;
 3785     case T_SHORT: return !VM_Version::supports_avx512bw();
 3786     case T_INT:   return !VM_Version::supports_avx();
 3787     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3788     default:
 3789       ShouldNotReachHere();
 3790       return false;
 3791   }
 3792 }
 3793 
 3794 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3795   // Prefer predicate if the mask type is "TypePVectMask".
 3796   return vt->isa_pvectmask() != nullptr;
 3797 }
 3798 
 3799 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3800   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3801   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3802   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3803       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3804     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3805     return new legVecZOper();
 3806   }
 3807   if (legacy) {
 3808     switch (ideal_reg) {
 3809       case Op_VecS: return new legVecSOper();
 3810       case Op_VecD: return new legVecDOper();
 3811       case Op_VecX: return new legVecXOper();
 3812       case Op_VecY: return new legVecYOper();
 3813       case Op_VecZ: return new legVecZOper();
 3814     }
 3815   } else {
 3816     switch (ideal_reg) {
 3817       case Op_VecS: return new vecSOper();
 3818       case Op_VecD: return new vecDOper();
 3819       case Op_VecX: return new vecXOper();
 3820       case Op_VecY: return new vecYOper();
 3821       case Op_VecZ: return new vecZOper();
 3822     }
 3823   }
 3824   ShouldNotReachHere();
 3825   return nullptr;
 3826 }
 3827 
 3828 bool Matcher::is_reg2reg_move(MachNode* m) {
 3829   switch (m->rule()) {
 3830     case MoveVec2Leg_rule:
 3831     case MoveLeg2Vec_rule:
 3832     case MoveF2VL_rule:
 3833     case MoveF2LEG_rule:
 3834     case MoveVL2F_rule:
 3835     case MoveLEG2F_rule:
 3836     case MoveD2VL_rule:
 3837     case MoveD2LEG_rule:
 3838     case MoveVL2D_rule:
 3839     case MoveLEG2D_rule:
 3840       return true;
 3841     default:
 3842       return false;
 3843   }
 3844 }
 3845 
 3846 bool Matcher::is_generic_vector(MachOper* opnd) {
 3847   switch (opnd->opcode()) {
 3848     case VEC:
 3849     case LEGVEC:
 3850       return true;
 3851     default:
 3852       return false;
 3853   }
 3854 }
 3855 
 3856 //------------------------------------------------------------------------
 3857 
 3858 const RegMask* Matcher::predicate_reg_mask(void) {
 3859   return &_VECTMASK_REG_mask;
 3860 }
 3861 
 3862 // Max vector size in bytes. 0 if not supported.
 3863 int Matcher::vector_width_in_bytes(BasicType bt) {
 3864   assert(is_java_primitive(bt), "only primitive type vectors");
 3865   // SSE2 supports 128bit vectors for all types.
 3866   // AVX2 supports 256bit vectors for all types.
 3867   // AVX2/EVEX supports 512bit vectors for all types.
 3868   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3869   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3870   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3871     size = (UseAVX > 2) ? 64 : 32;
 3872   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3873     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3874   // Use flag to limit vector size.
 3875   size = MIN2(size,(int)MaxVectorSize);
 3876   // Minimum 2 values in vector (or 4 for bytes).
 3877   switch (bt) {
 3878   case T_DOUBLE:
 3879   case T_LONG:
 3880     if (size < 16) return 0;
 3881     break;
 3882   case T_FLOAT:
 3883   case T_INT:
 3884     if (size < 8) return 0;
 3885     break;
 3886   case T_BOOLEAN:
 3887     if (size < 4) return 0;
 3888     break;
 3889   case T_CHAR:
 3890     if (size < 4) return 0;
 3891     break;
 3892   case T_BYTE:
 3893     if (size < 4) return 0;
 3894     break;
 3895   case T_SHORT:
 3896     if (size < 4) return 0;
 3897     break;
 3898   default:
 3899     ShouldNotReachHere();
 3900   }
 3901   return size;
 3902 }
 3903 
 3904 // Limits on vector size (number of elements) loaded into vector.
 3905 int Matcher::max_vector_size(const BasicType bt) {
 3906   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3907 }
 3908 int Matcher::min_vector_size(const BasicType bt) {
 3909   int max_size = max_vector_size(bt);
 3910   // Min size which can be loaded into vector is 4 bytes.
 3911   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3912   // Support for calling svml double64 vectors
 3913   if (bt == T_DOUBLE) {
 3914     size = 1;
 3915   }
 3916   return MIN2(size,max_size);
 3917 }
 3918 
 3919 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3920   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3921   // by default on Cascade Lake
 3922   if (VM_Version::is_default_intel_cascade_lake()) {
 3923     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3924   }
 3925   return Matcher::max_vector_size(bt);
 3926 }
 3927 
 3928 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3929   return -1;
 3930 }
 3931 
 3932 // Vector ideal reg corresponding to specified size in bytes
 3933 uint Matcher::vector_ideal_reg(int size) {
 3934   assert(MaxVectorSize >= size, "");
 3935   switch(size) {
 3936     case  4: return Op_VecS;
 3937     case  8: return Op_VecD;
 3938     case 16: return Op_VecX;
 3939     case 32: return Op_VecY;
 3940     case 64: return Op_VecZ;
 3941   }
 3942   ShouldNotReachHere();
 3943   return 0;
 3944 }
 3945 
 3946 // Check for shift by small constant as well
 3947 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3948   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3949       shift->in(2)->get_int() <= 3 &&
 3950       // Are there other uses besides address expressions?
 3951       !matcher->is_visited(shift)) {
 3952     address_visited.set(shift->_idx); // Flag as address_visited
 3953     mstack.push(shift->in(2), Matcher::Visit);
 3954     Node *conv = shift->in(1);
 3955     // Allow Matcher to match the rule which bypass
 3956     // ConvI2L operation for an array index on LP64
 3957     // if the index value is positive.
 3958     if (conv->Opcode() == Op_ConvI2L &&
 3959         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3960         // Are there other uses besides address expressions?
 3961         !matcher->is_visited(conv)) {
 3962       address_visited.set(conv->_idx); // Flag as address_visited
 3963       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3964     } else {
 3965       mstack.push(conv, Matcher::Pre_Visit);
 3966     }
 3967     return true;
 3968   }
 3969   return false;
 3970 }
 3971 
 3972 // This function identifies sub-graphs in which a 'load' node is
 3973 // input to two different nodes, and such that it can be matched
 3974 // with BMI instructions like blsi, blsr, etc.
 3975 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3976 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3977 // refers to the same node.
 3978 //
 3979 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3980 // This is a temporary solution until we make DAGs expressible in ADL.
 3981 template<typename ConType>
 3982 class FusedPatternMatcher {
 3983   Node* _op1_node;
 3984   Node* _mop_node;
 3985   int _con_op;
 3986 
 3987   static int match_next(Node* n, int next_op, int next_op_idx) {
 3988     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3989       return -1;
 3990     }
 3991 
 3992     if (next_op_idx == -1) { // n is commutative, try rotations
 3993       if (n->in(1)->Opcode() == next_op) {
 3994         return 1;
 3995       } else if (n->in(2)->Opcode() == next_op) {
 3996         return 2;
 3997       }
 3998     } else {
 3999       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 4000       if (n->in(next_op_idx)->Opcode() == next_op) {
 4001         return next_op_idx;
 4002       }
 4003     }
 4004     return -1;
 4005   }
 4006 
 4007  public:
 4008   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 4009     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 4010 
 4011   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 4012              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4013              typename ConType::NativeType con_value) {
 4014     if (_op1_node->Opcode() != op1) {
 4015       return false;
 4016     }
 4017     if (_mop_node->outcnt() > 2) {
 4018       return false;
 4019     }
 4020     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4021     if (op1_op2_idx == -1) {
 4022       return false;
 4023     }
 4024     // Memory operation must be the other edge
 4025     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4026 
 4027     // Check that the mop node is really what we want
 4028     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4029       Node* op2_node = _op1_node->in(op1_op2_idx);
 4030       if (op2_node->outcnt() > 1) {
 4031         return false;
 4032       }
 4033       assert(op2_node->Opcode() == op2, "Should be");
 4034       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4035       if (op2_con_idx == -1) {
 4036         return false;
 4037       }
 4038       // Memory operation must be the other edge
 4039       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4040       // Check that the memory operation is the same node
 4041       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4042         // Now check the constant
 4043         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4044         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4045           return true;
 4046         }
 4047       }
 4048     }
 4049     return false;
 4050   }
 4051 };
 4052 
 4053 static bool is_bmi_pattern(Node* n, Node* m) {
 4054   assert(VM_Version::supports_bmi1() && VM_Version::supports_avx(), "sanity");
 4055   if (n != nullptr && m != nullptr) {
 4056     if (m->Opcode() == Op_LoadI) {
 4057       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4058       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4059              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4060              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4061     } else if (m->Opcode() == Op_LoadL) {
 4062       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4063       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4064              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4065              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4066     }
 4067   }
 4068   return false;
 4069 }
 4070 
 4071 // Should the matcher clone input 'm' of node 'n'?
 4072 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4073   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4074   if (VM_Version::supports_bmi1() && VM_Version::supports_avx() && is_bmi_pattern(n, m)) {
 4075     mstack.push(m, Visit);
 4076     return true;
 4077   }
 4078   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4079     mstack.push(m, Visit);           // m = ShiftCntV
 4080     return true;
 4081   }
 4082   if (is_encode_and_store_pattern(n, m)) {
 4083     mstack.push(m, Visit);
 4084     return true;
 4085   }
 4086   return false;
 4087 }
 4088 
 4089 // Should the Matcher clone shifts on addressing modes, expecting them
 4090 // to be subsumed into complex addressing expressions or compute them
 4091 // into registers?
 4092 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4093   Node *off = m->in(AddPNode::Offset);
 4094   if (off->is_Con()) {
 4095     address_visited.test_set(m->_idx); // Flag as address_visited
 4096     Node *adr = m->in(AddPNode::Address);
 4097 
 4098     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4099     // AtomicAdd is not an addressing expression.
 4100     // Cheap to find it by looking for screwy base.
 4101     if (adr->is_AddP() &&
 4102         !adr->in(AddPNode::Base)->is_top() &&
 4103         !adr->in(AddPNode::Offset)->is_Con() &&
 4104         off->get_long() == (int) (off->get_long()) && // immL32
 4105         // Are there other uses besides address expressions?
 4106         !is_visited(adr)) {
 4107       address_visited.set(adr->_idx); // Flag as address_visited
 4108       Node *shift = adr->in(AddPNode::Offset);
 4109       if (!clone_shift(shift, this, mstack, address_visited)) {
 4110         mstack.push(shift, Pre_Visit);
 4111       }
 4112       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4113       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4114     } else {
 4115       mstack.push(adr, Pre_Visit);
 4116     }
 4117 
 4118     // Clone X+offset as it also folds into most addressing expressions
 4119     mstack.push(off, Visit);
 4120     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4121     return true;
 4122   } else if (clone_shift(off, this, mstack, address_visited)) {
 4123     address_visited.test_set(m->_idx); // Flag as address_visited
 4124     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4125     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4126     return true;
 4127   }
 4128   return false;
 4129 }
 4130 
 4131 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4132   switch (bt) {
 4133     case BoolTest::eq:
 4134       return Assembler::eq;
 4135     case BoolTest::ne:
 4136       return Assembler::neq;
 4137     case BoolTest::le:
 4138     case BoolTest::ule:
 4139       return Assembler::le;
 4140     case BoolTest::ge:
 4141     case BoolTest::uge:
 4142       return Assembler::nlt;
 4143     case BoolTest::lt:
 4144     case BoolTest::ult:
 4145       return Assembler::lt;
 4146     case BoolTest::gt:
 4147     case BoolTest::ugt:
 4148       return Assembler::nle;
 4149     default : ShouldNotReachHere(); return Assembler::_false;
 4150   }
 4151 }
 4152 
 4153 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4154   switch (bt) {
 4155   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4156   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4157   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4158   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4159   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4160   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4161   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4162   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4163   }
 4164 }
 4165 
 4166 // Helper methods for MachSpillCopyNode::implementation().
 4167 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4168                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4169   assert(ireg == Op_VecS || // 32bit vector
 4170          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4171           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4172          "no non-adjacent vector moves" );
 4173   if (masm) {
 4174     switch (ireg) {
 4175     case Op_VecS: // copy whole register
 4176     case Op_VecD:
 4177     case Op_VecX:
 4178       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4179         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4180       } else {
 4181         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4182      }
 4183       break;
 4184     case Op_VecY:
 4185       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4186         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4187       } else {
 4188         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4189      }
 4190       break;
 4191     case Op_VecZ:
 4192       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4193       break;
 4194     default:
 4195       ShouldNotReachHere();
 4196     }
 4197 #ifndef PRODUCT
 4198   } else {
 4199     switch (ireg) {
 4200     case Op_VecS:
 4201     case Op_VecD:
 4202     case Op_VecX:
 4203       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4204       break;
 4205     case Op_VecY:
 4206     case Op_VecZ:
 4207       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4208       break;
 4209     default:
 4210       ShouldNotReachHere();
 4211     }
 4212 #endif
 4213   }
 4214 }
 4215 
 4216 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4217                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4218   if (masm) {
 4219     if (is_load) {
 4220       switch (ireg) {
 4221       case Op_VecS:
 4222         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4223         break;
 4224       case Op_VecD:
 4225         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4226         break;
 4227       case Op_VecX:
 4228         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4229           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4230         } else {
 4231           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4232           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4233         }
 4234         break;
 4235       case Op_VecY:
 4236         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4237           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4238         } else {
 4239           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4240           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4241         }
 4242         break;
 4243       case Op_VecZ:
 4244         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4245         break;
 4246       default:
 4247         ShouldNotReachHere();
 4248       }
 4249     } else { // store
 4250       switch (ireg) {
 4251       case Op_VecS:
 4252         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4253         break;
 4254       case Op_VecD:
 4255         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4256         break;
 4257       case Op_VecX:
 4258         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4259           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4260         }
 4261         else {
 4262           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4263         }
 4264         break;
 4265       case Op_VecY:
 4266         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4267           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4268         }
 4269         else {
 4270           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4271         }
 4272         break;
 4273       case Op_VecZ:
 4274         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4275         break;
 4276       default:
 4277         ShouldNotReachHere();
 4278       }
 4279     }
 4280 #ifndef PRODUCT
 4281   } else {
 4282     if (is_load) {
 4283       switch (ireg) {
 4284       case Op_VecS:
 4285         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4286         break;
 4287       case Op_VecD:
 4288         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4289         break;
 4290        case Op_VecX:
 4291         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4292         break;
 4293       case Op_VecY:
 4294       case Op_VecZ:
 4295         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4296         break;
 4297       default:
 4298         ShouldNotReachHere();
 4299       }
 4300     } else { // store
 4301       switch (ireg) {
 4302       case Op_VecS:
 4303         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4304         break;
 4305       case Op_VecD:
 4306         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4307         break;
 4308        case Op_VecX:
 4309         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4310         break;
 4311       case Op_VecY:
 4312       case Op_VecZ:
 4313         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4314         break;
 4315       default:
 4316         ShouldNotReachHere();
 4317       }
 4318     }
 4319 #endif
 4320   }
 4321 }
 4322 
 4323 template <class T>
 4324 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4325   int size = type2aelembytes(bt) * len;
 4326   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4327   for (int i = 0; i < len; i++) {
 4328     int offset = i * type2aelembytes(bt);
 4329     switch (bt) {
 4330       case T_BYTE: val->at(i) = con; break;
 4331       case T_SHORT: {
 4332         jshort c = con;
 4333         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4334         break;
 4335       }
 4336       case T_INT: {
 4337         jint c = con;
 4338         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4339         break;
 4340       }
 4341       case T_LONG: {
 4342         jlong c = con;
 4343         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4344         break;
 4345       }
 4346       case T_FLOAT: {
 4347         jfloat c = con;
 4348         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4349         break;
 4350       }
 4351       case T_DOUBLE: {
 4352         jdouble c = con;
 4353         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4354         break;
 4355       }
 4356       default: assert(false, "%s", type2name(bt));
 4357     }
 4358   }
 4359   return val;
 4360 }
 4361 
 4362 static inline jlong high_bit_set(BasicType bt) {
 4363   switch (bt) {
 4364     case T_BYTE:  return 0x8080808080808080;
 4365     case T_SHORT: return 0x8000800080008000;
 4366     case T_INT:   return 0x8000000080000000;
 4367     case T_LONG:  return 0x8000000000000000;
 4368     default:
 4369       ShouldNotReachHere();
 4370       return 0;
 4371   }
 4372 }
 4373 
 4374 #ifndef PRODUCT
 4375   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4376     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4377   }
 4378 #endif
 4379 
 4380   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4381     __ nop(_count);
 4382   }
 4383 
 4384   uint MachNopNode::size(PhaseRegAlloc*) const {
 4385     return _count;
 4386   }
 4387 
 4388 #ifndef PRODUCT
 4389   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4390     st->print("# breakpoint");
 4391   }
 4392 #endif
 4393 
 4394   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4395     __ int3();
 4396   }
 4397 
 4398   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4399     return MachNode::size(ra_);
 4400   }
 4401 
 4402 %}
 4403 
 4404 //----------ENCODING BLOCK-----------------------------------------------------
 4405 // This block specifies the encoding classes used by the compiler to
 4406 // output byte streams.  Encoding classes are parameterized macros
 4407 // used by Machine Instruction Nodes in order to generate the bit
 4408 // encoding of the instruction.  Operands specify their base encoding
 4409 // interface with the interface keyword.  There are currently
 4410 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4411 // COND_INTER.  REG_INTER causes an operand to generate a function
 4412 // which returns its register number when queried.  CONST_INTER causes
 4413 // an operand to generate a function which returns the value of the
 4414 // constant when queried.  MEMORY_INTER causes an operand to generate
 4415 // four functions which return the Base Register, the Index Register,
 4416 // the Scale Value, and the Offset Value of the operand when queried.
 4417 // COND_INTER causes an operand to generate six functions which return
 4418 // the encoding code (ie - encoding bits for the instruction)
 4419 // associated with each basic boolean condition for a conditional
 4420 // instruction.
 4421 //
 4422 // Instructions specify two basic values for encoding.  Again, a
 4423 // function is available to check if the constant displacement is an
 4424 // oop. They use the ins_encode keyword to specify their encoding
 4425 // classes (which must be a sequence of enc_class names, and their
 4426 // parameters, specified in the encoding block), and they use the
 4427 // opcode keyword to specify, in order, their primary, secondary, and
 4428 // tertiary opcode.  Only the opcode sections which a particular
 4429 // instruction needs for encoding need to be specified.
 4430 encode %{
 4431   enc_class cdql_enc(no_rax_rdx_RegI div)
 4432   %{
 4433     // Full implementation of Java idiv and irem; checks for
 4434     // special case as described in JVM spec., p.243 & p.271.
 4435     //
 4436     //         normal case                           special case
 4437     //
 4438     // input : rax: dividend                         min_int
 4439     //         reg: divisor                          -1
 4440     //
 4441     // output: rax: quotient  (= rax idiv reg)       min_int
 4442     //         rdx: remainder (= rax irem reg)       0
 4443     //
 4444     //  Code sequnce:
 4445     //
 4446     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4447     //    5:   75 07/08                jne    e <normal>
 4448     //    7:   33 d2                   xor    %edx,%edx
 4449     //  [div >= 8 -> offset + 1]
 4450     //  [REX_B]
 4451     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4452     //    c:   74 03/04                je     11 <done>
 4453     // 000000000000000e <normal>:
 4454     //    e:   99                      cltd
 4455     //  [div >= 8 -> offset + 1]
 4456     //  [REX_B]
 4457     //    f:   f7 f9                   idiv   $div
 4458     // 0000000000000011 <done>:
 4459     Label normal;
 4460     Label done;
 4461 
 4462     // cmp    $0x80000000,%eax
 4463     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4464 
 4465     // jne    e <normal>
 4466     __ jccb(Assembler::notEqual, normal);
 4467 
 4468     // xor    %edx,%edx
 4469     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4470 
 4471     // cmp    $0xffffffffffffffff,%ecx
 4472     __ cmpl($div$$Register, -1);
 4473 
 4474     // je     11 <done>
 4475     __ jccb(Assembler::equal, done);
 4476 
 4477     // <normal>
 4478     // cltd
 4479     __ bind(normal);
 4480     __ cdql();
 4481 
 4482     // idivl
 4483     // <done>
 4484     __ idivl($div$$Register);
 4485     __ bind(done);
 4486   %}
 4487 
 4488   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4489   %{
 4490     // Full implementation of Java ldiv and lrem; checks for
 4491     // special case as described in JVM spec., p.243 & p.271.
 4492     //
 4493     //         normal case                           special case
 4494     //
 4495     // input : rax: dividend                         min_long
 4496     //         reg: divisor                          -1
 4497     //
 4498     // output: rax: quotient  (= rax idiv reg)       min_long
 4499     //         rdx: remainder (= rax irem reg)       0
 4500     //
 4501     //  Code sequnce:
 4502     //
 4503     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4504     //    7:   00 00 80
 4505     //    a:   48 39 d0                cmp    %rdx,%rax
 4506     //    d:   75 08                   jne    17 <normal>
 4507     //    f:   33 d2                   xor    %edx,%edx
 4508     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4509     //   15:   74 05                   je     1c <done>
 4510     // 0000000000000017 <normal>:
 4511     //   17:   48 99                   cqto
 4512     //   19:   48 f7 f9                idiv   $div
 4513     // 000000000000001c <done>:
 4514     Label normal;
 4515     Label done;
 4516 
 4517     // mov    $0x8000000000000000,%rdx
 4518     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4519 
 4520     // cmp    %rdx,%rax
 4521     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4522 
 4523     // jne    17 <normal>
 4524     __ jccb(Assembler::notEqual, normal);
 4525 
 4526     // xor    %edx,%edx
 4527     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4528 
 4529     // cmp    $0xffffffffffffffff,$div
 4530     __ cmpq($div$$Register, -1);
 4531 
 4532     // je     1e <done>
 4533     __ jccb(Assembler::equal, done);
 4534 
 4535     // <normal>
 4536     // cqto
 4537     __ bind(normal);
 4538     __ cdqq();
 4539 
 4540     // idivq (note: must be emitted by the user of this rule)
 4541     // <done>
 4542     __ idivq($div$$Register);
 4543     __ bind(done);
 4544   %}
 4545 
 4546   enc_class clear_avx %{
 4547     DEBUG_ONLY(int off0 = __ offset());
 4548     if (generate_vzeroupper(Compile::current())) {
 4549       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4550       // Clear upper bits of YMM registers when current compiled code uses
 4551       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4552       __ vzeroupper();
 4553     }
 4554     DEBUG_ONLY(int off1 = __ offset());
 4555     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4556   %}
 4557 
 4558   enc_class Java_To_Runtime(method meth) %{
 4559     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4560     __ call(r10);
 4561     __ post_call_nop();
 4562   %}
 4563 
 4564   enc_class Java_Static_Call(method meth)
 4565   %{
 4566     // JAVA STATIC CALL
 4567     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4568     // determine who we intended to call.
 4569     if (!_method) {
 4570       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4571     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4572       // The NOP here is purely to ensure that eliding a call to
 4573       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4574       __ nop(5);
 4575       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4576     } else {
 4577       int method_index = resolved_method_index(masm);
 4578       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4579                                                   : static_call_Relocation::spec(method_index);
 4580       address mark = __ pc();
 4581       int call_offset = __ offset();
 4582       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4583       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4584         // Calls of the same statically bound method can share
 4585         // a stub to the interpreter.
 4586         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4587       } else {
 4588         // Emit stubs for static call.
 4589         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4590         __ clear_inst_mark();
 4591         if (stub == nullptr) {
 4592           ciEnv::current()->record_failure("CodeCache is full");
 4593           return;
 4594         }
 4595       }
 4596     }
 4597     __ post_call_nop();
 4598   %}
 4599 
 4600   enc_class Java_Dynamic_Call(method meth) %{
 4601     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4602     __ post_call_nop();
 4603   %}
 4604 
 4605   enc_class call_epilog %{
 4606     if (VerifyStackAtCalls) {
 4607       // Check that stack depth is unchanged: find majik cookie on stack
 4608       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4609       Label L;
 4610       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4611       __ jccb(Assembler::equal, L);
 4612       // Die if stack mismatch
 4613       __ int3();
 4614       __ bind(L);
 4615     }
 4616     if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
 4617       // The last return value is not set by the callee but used to pass the null marker to compiled code.
 4618       // Search for the corresponding projection, get the register and emit code that initializes it.
 4619       uint con = (tf()->range_cc()->cnt() - 1);
 4620       for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
 4621         ProjNode* proj = fast_out(i)->as_Proj();
 4622         if (proj->_con == con) {
 4623           // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
 4624           OptoReg::Name optoReg = ra_->get_reg_first(proj);
 4625           VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
 4626           Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
 4627           __ testq(rax, rax);
 4628           __ setb(Assembler::notZero, toReg);
 4629           __ movzbl(toReg, toReg);
 4630           if (reg->is_stack()) {
 4631             int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
 4632             __ movq(Address(rsp, st_off), toReg);
 4633           }
 4634           break;
 4635         }
 4636       }
 4637       if (return_value_is_used()) {
 4638         // An inline type is returned as fields in multiple registers.
 4639         // Rax either contains an oop if the inline type is buffered or a pointer
 4640         // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
 4641         // if the lowest bit is set to allow C2 to use the oop after null checking.
 4642         // rax &= (rax & 1) - 1
 4643         __ movptr(rscratch1, rax);
 4644         __ andptr(rscratch1, 0x1);
 4645         __ subptr(rscratch1, 0x1);
 4646         __ andptr(rax, rscratch1);
 4647       }
 4648     }
 4649   %}
 4650 
 4651 %}
 4652 
 4653 //----------FRAME--------------------------------------------------------------
 4654 // Definition of frame structure and management information.
 4655 //
 4656 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4657 //                             |   (to get allocators register number
 4658 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4659 //  r   CALLER     |        |
 4660 //  o     |        +--------+      pad to even-align allocators stack-slot
 4661 //  w     V        |  pad0  |        numbers; owned by CALLER
 4662 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4663 //  h     ^        |   in   |  5
 4664 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4665 //  |     |        |        |  3
 4666 //  |     |        +--------+
 4667 //  V     |        | old out|      Empty on Intel, window on Sparc
 4668 //        |    old |preserve|      Must be even aligned.
 4669 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4670 //        |        |   in   |  3   area for Intel ret address
 4671 //     Owned by    |preserve|      Empty on Sparc.
 4672 //       SELF      +--------+
 4673 //        |        |  pad2  |  2   pad to align old SP
 4674 //        |        +--------+  1
 4675 //        |        | locks  |  0
 4676 //        |        +--------+----> OptoReg::stack0(), even aligned
 4677 //        |        |  pad1  | 11   pad to align new SP
 4678 //        |        +--------+
 4679 //        |        |        | 10
 4680 //        |        | spills |  9   spills
 4681 //        V        |        |  8   (pad0 slot for callee)
 4682 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4683 //        ^        |  out   |  7
 4684 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4685 //     Owned by    +--------+
 4686 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4687 //        |    new |preserve|      Must be even-aligned.
 4688 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4689 //        |        |        |
 4690 //
 4691 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4692 //         known from SELF's arguments and the Java calling convention.
 4693 //         Region 6-7 is determined per call site.
 4694 // Note 2: If the calling convention leaves holes in the incoming argument
 4695 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4696 //         are owned by the CALLEE.  Holes should not be necessary in the
 4697 //         incoming area, as the Java calling convention is completely under
 4698 //         the control of the AD file.  Doubles can be sorted and packed to
 4699 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4700 //         varargs C calling conventions.
 4701 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4702 //         even aligned with pad0 as needed.
 4703 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4704 //         region 6-11 is even aligned; it may be padded out more so that
 4705 //         the region from SP to FP meets the minimum stack alignment.
 4706 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4707 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4708 //         SP meets the minimum alignment.
 4709 
 4710 frame
 4711 %{
 4712   // These three registers define part of the calling convention
 4713   // between compiled code and the interpreter.
 4714   inline_cache_reg(RAX);                // Inline Cache Register
 4715 
 4716   // Optional: name the operand used by cisc-spilling to access
 4717   // [stack_pointer + offset]
 4718   cisc_spilling_operand_name(indOffset32);
 4719 
 4720   // Number of stack slots consumed by locking an object
 4721   sync_stack_slots(2);
 4722 
 4723   // Compiled code's Frame Pointer
 4724   frame_pointer(RSP);
 4725 
 4726   // Stack alignment requirement
 4727   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4728 
 4729   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4730   // for calls to C.  Supports the var-args backing area for register parms.
 4731   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4732 
 4733   // The after-PROLOG location of the return address.  Location of
 4734   // return address specifies a type (REG or STACK) and a number
 4735   // representing the register number (i.e. - use a register name) or
 4736   // stack slot.
 4737   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4738   // Otherwise, it is above the locks and verification slot and alignment word
 4739   return_addr(STACK - 2 +
 4740               align_up((Compile::current()->in_preserve_stack_slots() +
 4741                         Compile::current()->fixed_slots()),
 4742                        stack_alignment_in_slots()));
 4743 
 4744   // Location of compiled Java return values.  Same as C for now.
 4745   return_value
 4746   %{
 4747     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4748            "only return normal values");
 4749 
 4750     static const int lo[Op_RegL + 1] = {
 4751       0,
 4752       0,
 4753       RAX_num,  // Op_RegN
 4754       RAX_num,  // Op_RegI
 4755       RAX_num,  // Op_RegP
 4756       XMM0_num, // Op_RegF
 4757       XMM0_num, // Op_RegD
 4758       RAX_num   // Op_RegL
 4759     };
 4760     static const int hi[Op_RegL + 1] = {
 4761       0,
 4762       0,
 4763       OptoReg::Bad, // Op_RegN
 4764       OptoReg::Bad, // Op_RegI
 4765       RAX_H_num,    // Op_RegP
 4766       OptoReg::Bad, // Op_RegF
 4767       XMM0b_num,    // Op_RegD
 4768       RAX_H_num     // Op_RegL
 4769     };
 4770     // Excluded flags and vector registers.
 4771     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4772     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4773   %}
 4774 %}
 4775 
 4776 //----------ATTRIBUTES---------------------------------------------------------
 4777 //----------Operand Attributes-------------------------------------------------
 4778 op_attrib op_cost(0);        // Required cost attribute
 4779 
 4780 //----------Instruction Attributes---------------------------------------------
 4781 ins_attrib ins_cost(100);       // Required cost attribute
 4782 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4783 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4784                                 // a non-matching short branch variant
 4785                                 // of some long branch?
 4786 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4787                                 // be a power of 2) specifies the
 4788                                 // alignment that some part of the
 4789                                 // instruction (not necessarily the
 4790                                 // start) requires.  If > 1, a
 4791                                 // compute_padding() function must be
 4792                                 // provided for the instruction
 4793 
 4794 // Whether this node is expanded during code emission into a sequence of
 4795 // instructions and the first instruction can perform an implicit null check.
 4796 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4797 
 4798 //----------OPERANDS-----------------------------------------------------------
 4799 // Operand definitions must precede instruction definitions for correct parsing
 4800 // in the ADLC because operands constitute user defined types which are used in
 4801 // instruction definitions.
 4802 
 4803 //----------Simple Operands----------------------------------------------------
 4804 // Immediate Operands
 4805 // Integer Immediate
 4806 operand immI()
 4807 %{
 4808   match(ConI);
 4809 
 4810   op_cost(10);
 4811   format %{ %}
 4812   interface(CONST_INTER);
 4813 %}
 4814 
 4815 // Constant for test vs zero
 4816 operand immI_0()
 4817 %{
 4818   predicate(n->get_int() == 0);
 4819   match(ConI);
 4820 
 4821   op_cost(0);
 4822   format %{ %}
 4823   interface(CONST_INTER);
 4824 %}
 4825 
 4826 // Constant for increment
 4827 operand immI_1()
 4828 %{
 4829   predicate(n->get_int() == 1);
 4830   match(ConI);
 4831 
 4832   op_cost(0);
 4833   format %{ %}
 4834   interface(CONST_INTER);
 4835 %}
 4836 
 4837 // Constant for decrement
 4838 operand immI_M1()
 4839 %{
 4840   predicate(n->get_int() == -1);
 4841   match(ConI);
 4842 
 4843   op_cost(0);
 4844   format %{ %}
 4845   interface(CONST_INTER);
 4846 %}
 4847 
 4848 operand immI_2()
 4849 %{
 4850   predicate(n->get_int() == 2);
 4851   match(ConI);
 4852 
 4853   op_cost(0);
 4854   format %{ %}
 4855   interface(CONST_INTER);
 4856 %}
 4857 
 4858 operand immI_4()
 4859 %{
 4860   predicate(n->get_int() == 4);
 4861   match(ConI);
 4862 
 4863   op_cost(0);
 4864   format %{ %}
 4865   interface(CONST_INTER);
 4866 %}
 4867 
 4868 operand immI_8()
 4869 %{
 4870   predicate(n->get_int() == 8);
 4871   match(ConI);
 4872 
 4873   op_cost(0);
 4874   format %{ %}
 4875   interface(CONST_INTER);
 4876 %}
 4877 
 4878 // Valid scale values for addressing modes
 4879 operand immI2()
 4880 %{
 4881   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4882   match(ConI);
 4883 
 4884   format %{ %}
 4885   interface(CONST_INTER);
 4886 %}
 4887 
 4888 operand immU7()
 4889 %{
 4890   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4891   match(ConI);
 4892 
 4893   op_cost(5);
 4894   format %{ %}
 4895   interface(CONST_INTER);
 4896 %}
 4897 
 4898 operand immI8()
 4899 %{
 4900   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4901   match(ConI);
 4902 
 4903   op_cost(5);
 4904   format %{ %}
 4905   interface(CONST_INTER);
 4906 %}
 4907 
 4908 operand immU8()
 4909 %{
 4910   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4911   match(ConI);
 4912 
 4913   op_cost(5);
 4914   format %{ %}
 4915   interface(CONST_INTER);
 4916 %}
 4917 
 4918 operand immI16()
 4919 %{
 4920   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4921   match(ConI);
 4922 
 4923   op_cost(10);
 4924   format %{ %}
 4925   interface(CONST_INTER);
 4926 %}
 4927 
 4928 // Int Immediate non-negative
 4929 operand immU31()
 4930 %{
 4931   predicate(n->get_int() >= 0);
 4932   match(ConI);
 4933 
 4934   op_cost(0);
 4935   format %{ %}
 4936   interface(CONST_INTER);
 4937 %}
 4938 
 4939 // Pointer Immediate
 4940 operand immP()
 4941 %{
 4942   match(ConP);
 4943 
 4944   op_cost(10);
 4945   format %{ %}
 4946   interface(CONST_INTER);
 4947 %}
 4948 
 4949 // Null Pointer Immediate
 4950 operand immP0()
 4951 %{
 4952   predicate(n->get_ptr() == 0);
 4953   match(ConP);
 4954 
 4955   op_cost(5);
 4956   format %{ %}
 4957   interface(CONST_INTER);
 4958 %}
 4959 
 4960 // Pointer Immediate
 4961 operand immN() %{
 4962   match(ConN);
 4963 
 4964   op_cost(10);
 4965   format %{ %}
 4966   interface(CONST_INTER);
 4967 %}
 4968 
 4969 operand immNKlass() %{
 4970   match(ConNKlass);
 4971 
 4972   op_cost(10);
 4973   format %{ %}
 4974   interface(CONST_INTER);
 4975 %}
 4976 
 4977 // Null Pointer Immediate
 4978 operand immN0() %{
 4979   predicate(n->get_narrowcon() == 0);
 4980   match(ConN);
 4981 
 4982   op_cost(5);
 4983   format %{ %}
 4984   interface(CONST_INTER);
 4985 %}
 4986 
 4987 operand immP31()
 4988 %{
 4989   predicate(n->as_Type()->type()->is_ptr()->reloc() == relocInfo::none
 4990             && (n->get_ptr() >> 31) == 0);
 4991   match(ConP);
 4992 
 4993   op_cost(5);
 4994   format %{ %}
 4995   interface(CONST_INTER);
 4996 %}
 4997 
 4998 
 4999 // Long Immediate
 5000 operand immL()
 5001 %{
 5002   match(ConL);
 5003 
 5004   op_cost(20);
 5005   format %{ %}
 5006   interface(CONST_INTER);
 5007 %}
 5008 
 5009 // Long Immediate 8-bit
 5010 operand immL8()
 5011 %{
 5012   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 5013   match(ConL);
 5014 
 5015   op_cost(5);
 5016   format %{ %}
 5017   interface(CONST_INTER);
 5018 %}
 5019 
 5020 // Long Immediate 32-bit unsigned
 5021 operand immUL32()
 5022 %{
 5023   predicate(n->get_long() == (unsigned int) (n->get_long()));
 5024   match(ConL);
 5025 
 5026   op_cost(10);
 5027   format %{ %}
 5028   interface(CONST_INTER);
 5029 %}
 5030 
 5031 // Long Immediate 32-bit signed
 5032 operand immL32()
 5033 %{
 5034   predicate(n->get_long() == (int) (n->get_long()));
 5035   match(ConL);
 5036 
 5037   op_cost(15);
 5038   format %{ %}
 5039   interface(CONST_INTER);
 5040 %}
 5041 
 5042 operand immL_Pow2()
 5043 %{
 5044   predicate(is_power_of_2((julong)n->get_long()));
 5045   match(ConL);
 5046 
 5047   op_cost(15);
 5048   format %{ %}
 5049   interface(CONST_INTER);
 5050 %}
 5051 
 5052 operand immL_NotPow2()
 5053 %{
 5054   predicate(is_power_of_2((julong)~n->get_long()));
 5055   match(ConL);
 5056 
 5057   op_cost(15);
 5058   format %{ %}
 5059   interface(CONST_INTER);
 5060 %}
 5061 
 5062 // Long Immediate zero
 5063 operand immL0()
 5064 %{
 5065   predicate(n->get_long() == 0L);
 5066   match(ConL);
 5067 
 5068   op_cost(10);
 5069   format %{ %}
 5070   interface(CONST_INTER);
 5071 %}
 5072 
 5073 // Constant for increment
 5074 operand immL1()
 5075 %{
 5076   predicate(n->get_long() == 1);
 5077   match(ConL);
 5078 
 5079   format %{ %}
 5080   interface(CONST_INTER);
 5081 %}
 5082 
 5083 // Constant for decrement
 5084 operand immL_M1()
 5085 %{
 5086   predicate(n->get_long() == -1);
 5087   match(ConL);
 5088 
 5089   format %{ %}
 5090   interface(CONST_INTER);
 5091 %}
 5092 
 5093 // Long Immediate: low 32-bit mask
 5094 operand immL_32bits()
 5095 %{
 5096   predicate(n->get_long() == 0xFFFFFFFFL);
 5097   match(ConL);
 5098   op_cost(20);
 5099 
 5100   format %{ %}
 5101   interface(CONST_INTER);
 5102 %}
 5103 
 5104 // Int Immediate: 2^n-1, positive
 5105 operand immI_Pow2M1()
 5106 %{
 5107   predicate((n->get_int() > 0)
 5108             && is_power_of_2((juint)n->get_int() + 1));
 5109   match(ConI);
 5110 
 5111   op_cost(20);
 5112   format %{ %}
 5113   interface(CONST_INTER);
 5114 %}
 5115 
 5116 // Float Immediate zero
 5117 operand immF0()
 5118 %{
 5119   predicate(jint_cast(n->getf()) == 0);
 5120   match(ConF);
 5121 
 5122   op_cost(5);
 5123   format %{ %}
 5124   interface(CONST_INTER);
 5125 %}
 5126 
 5127 // Float Immediate
 5128 operand immF()
 5129 %{
 5130   match(ConF);
 5131 
 5132   op_cost(15);
 5133   format %{ %}
 5134   interface(CONST_INTER);
 5135 %}
 5136 
 5137 // Half Float Immediate
 5138 operand immH()
 5139 %{
 5140   match(ConH);
 5141 
 5142   op_cost(15);
 5143   format %{ %}
 5144   interface(CONST_INTER);
 5145 %}
 5146 
 5147 // Double Immediate zero
 5148 operand immD0()
 5149 %{
 5150   predicate(jlong_cast(n->getd()) == 0);
 5151   match(ConD);
 5152 
 5153   op_cost(5);
 5154   format %{ %}
 5155   interface(CONST_INTER);
 5156 %}
 5157 
 5158 // Double Immediate
 5159 operand immD()
 5160 %{
 5161   match(ConD);
 5162 
 5163   op_cost(15);
 5164   format %{ %}
 5165   interface(CONST_INTER);
 5166 %}
 5167 
 5168 // Immediates for special shifts (sign extend)
 5169 
 5170 // Constants for increment
 5171 operand immI_16()
 5172 %{
 5173   predicate(n->get_int() == 16);
 5174   match(ConI);
 5175 
 5176   format %{ %}
 5177   interface(CONST_INTER);
 5178 %}
 5179 
 5180 operand immI_24()
 5181 %{
 5182   predicate(n->get_int() == 24);
 5183   match(ConI);
 5184 
 5185   format %{ %}
 5186   interface(CONST_INTER);
 5187 %}
 5188 
 5189 // Constant for byte-wide masking
 5190 operand immI_255()
 5191 %{
 5192   predicate(n->get_int() == 255);
 5193   match(ConI);
 5194 
 5195   format %{ %}
 5196   interface(CONST_INTER);
 5197 %}
 5198 
 5199 // Constant for short-wide masking
 5200 operand immI_65535()
 5201 %{
 5202   predicate(n->get_int() == 65535);
 5203   match(ConI);
 5204 
 5205   format %{ %}
 5206   interface(CONST_INTER);
 5207 %}
 5208 
 5209 // Constant for byte-wide masking
 5210 operand immL_255()
 5211 %{
 5212   predicate(n->get_long() == 255);
 5213   match(ConL);
 5214 
 5215   format %{ %}
 5216   interface(CONST_INTER);
 5217 %}
 5218 
 5219 // Constant for short-wide masking
 5220 operand immL_65535()
 5221 %{
 5222   predicate(n->get_long() == 65535);
 5223   match(ConL);
 5224 
 5225   format %{ %}
 5226   interface(CONST_INTER);
 5227 %}
 5228 
 5229 // AOT Runtime Constants Address
 5230 operand immAOTRuntimeConstantsAddress()
 5231 %{
 5232   // Check if the address is in the range of AOT Runtime Constants
 5233   predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
 5234   match(ConP);
 5235 
 5236   op_cost(0);
 5237   format %{ %}
 5238   interface(CONST_INTER);
 5239 %}
 5240 
 5241 operand kReg()
 5242 %{
 5243   constraint(ALLOC_IN_RC(vectmask_reg));
 5244   match(RegVectMask);
 5245   format %{%}
 5246   interface(REG_INTER);
 5247 %}
 5248 
 5249 // Register Operands
 5250 // Integer Register
 5251 operand rRegI()
 5252 %{
 5253   constraint(ALLOC_IN_RC(int_reg));
 5254   match(RegI);
 5255 
 5256   match(rax_RegI);
 5257   match(rbx_RegI);
 5258   match(rcx_RegI);
 5259   match(rdx_RegI);
 5260   match(rdi_RegI);
 5261 
 5262   format %{ %}
 5263   interface(REG_INTER);
 5264 %}
 5265 
 5266 // Special Registers
 5267 operand rax_RegI()
 5268 %{
 5269   constraint(ALLOC_IN_RC(int_rax_reg));
 5270   match(RegI);
 5271   match(rRegI);
 5272 
 5273   format %{ "RAX" %}
 5274   interface(REG_INTER);
 5275 %}
 5276 
 5277 // Special Registers
 5278 operand rbx_RegI()
 5279 %{
 5280   constraint(ALLOC_IN_RC(int_rbx_reg));
 5281   match(RegI);
 5282   match(rRegI);
 5283 
 5284   format %{ "RBX" %}
 5285   interface(REG_INTER);
 5286 %}
 5287 
 5288 operand rcx_RegI()
 5289 %{
 5290   constraint(ALLOC_IN_RC(int_rcx_reg));
 5291   match(RegI);
 5292   match(rRegI);
 5293 
 5294   format %{ "RCX" %}
 5295   interface(REG_INTER);
 5296 %}
 5297 
 5298 operand rdx_RegI()
 5299 %{
 5300   constraint(ALLOC_IN_RC(int_rdx_reg));
 5301   match(RegI);
 5302   match(rRegI);
 5303 
 5304   format %{ "RDX" %}
 5305   interface(REG_INTER);
 5306 %}
 5307 
 5308 operand rdi_RegI()
 5309 %{
 5310   constraint(ALLOC_IN_RC(int_rdi_reg));
 5311   match(RegI);
 5312   match(rRegI);
 5313 
 5314   format %{ "RDI" %}
 5315   interface(REG_INTER);
 5316 %}
 5317 
 5318 operand no_rax_rdx_RegI()
 5319 %{
 5320   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5321   match(RegI);
 5322   match(rbx_RegI);
 5323   match(rcx_RegI);
 5324   match(rdi_RegI);
 5325 
 5326   format %{ %}
 5327   interface(REG_INTER);
 5328 %}
 5329 
 5330 operand no_rbp_r13_RegI()
 5331 %{
 5332   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5333   match(RegI);
 5334   match(rRegI);
 5335   match(rax_RegI);
 5336   match(rbx_RegI);
 5337   match(rcx_RegI);
 5338   match(rdx_RegI);
 5339   match(rdi_RegI);
 5340 
 5341   format %{ %}
 5342   interface(REG_INTER);
 5343 %}
 5344 
 5345 // Pointer Register
 5346 operand any_RegP()
 5347 %{
 5348   constraint(ALLOC_IN_RC(any_reg));
 5349   match(RegP);
 5350   match(rax_RegP);
 5351   match(rbx_RegP);
 5352   match(rdi_RegP);
 5353   match(rsi_RegP);
 5354   match(rbp_RegP);
 5355   match(r15_RegP);
 5356   match(rRegP);
 5357 
 5358   format %{ %}
 5359   interface(REG_INTER);
 5360 %}
 5361 
 5362 operand rRegP()
 5363 %{
 5364   constraint(ALLOC_IN_RC(ptr_reg));
 5365   match(RegP);
 5366   match(rax_RegP);
 5367   match(rbx_RegP);
 5368   match(rdi_RegP);
 5369   match(rsi_RegP);
 5370   match(rbp_RegP);  // See Q&A below about
 5371   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5372 
 5373   format %{ %}
 5374   interface(REG_INTER);
 5375 %}
 5376 
 5377 operand rRegN() %{
 5378   constraint(ALLOC_IN_RC(int_reg));
 5379   match(RegN);
 5380 
 5381   format %{ %}
 5382   interface(REG_INTER);
 5383 %}
 5384 
 5385 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5386 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5387 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5388 // The output of an instruction is controlled by the allocator, which respects
 5389 // register class masks, not match rules.  Unless an instruction mentions
 5390 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5391 // by the allocator as an input.
 5392 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5393 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5394 // result, RBP is not included in the output of the instruction either.
 5395 
 5396 // This operand is not allowed to use RBP even if
 5397 // RBP is not used to hold the frame pointer.
 5398 operand no_rbp_RegP()
 5399 %{
 5400   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5401   match(RegP);
 5402   match(rbx_RegP);
 5403   match(rsi_RegP);
 5404   match(rdi_RegP);
 5405 
 5406   format %{ %}
 5407   interface(REG_INTER);
 5408 %}
 5409 
 5410 // Special Registers
 5411 // Return a pointer value
 5412 operand rax_RegP()
 5413 %{
 5414   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5415   match(RegP);
 5416   match(rRegP);
 5417 
 5418   format %{ %}
 5419   interface(REG_INTER);
 5420 %}
 5421 
 5422 // Special Registers
 5423 // Return a compressed pointer value
 5424 operand rax_RegN()
 5425 %{
 5426   constraint(ALLOC_IN_RC(int_rax_reg));
 5427   match(RegN);
 5428   match(rRegN);
 5429 
 5430   format %{ %}
 5431   interface(REG_INTER);
 5432 %}
 5433 
 5434 // Used in AtomicAdd
 5435 operand rbx_RegP()
 5436 %{
 5437   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5438   match(RegP);
 5439   match(rRegP);
 5440 
 5441   format %{ %}
 5442   interface(REG_INTER);
 5443 %}
 5444 
 5445 operand rsi_RegP()
 5446 %{
 5447   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5448   match(RegP);
 5449   match(rRegP);
 5450 
 5451   format %{ %}
 5452   interface(REG_INTER);
 5453 %}
 5454 
 5455 operand rbp_RegP()
 5456 %{
 5457   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5458   match(RegP);
 5459   match(rRegP);
 5460 
 5461   format %{ %}
 5462   interface(REG_INTER);
 5463 %}
 5464 
 5465 // Used in rep stosq
 5466 operand rdi_RegP()
 5467 %{
 5468   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5469   match(RegP);
 5470   match(rRegP);
 5471 
 5472   format %{ %}
 5473   interface(REG_INTER);
 5474 %}
 5475 
 5476 operand r15_RegP()
 5477 %{
 5478   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5479   match(RegP);
 5480   match(rRegP);
 5481 
 5482   format %{ %}
 5483   interface(REG_INTER);
 5484 %}
 5485 
 5486 operand rRegL()
 5487 %{
 5488   constraint(ALLOC_IN_RC(long_reg));
 5489   match(RegL);
 5490   match(rax_RegL);
 5491   match(rdx_RegL);
 5492 
 5493   format %{ %}
 5494   interface(REG_INTER);
 5495 %}
 5496 
 5497 // Special Registers
 5498 operand no_rax_rdx_RegL()
 5499 %{
 5500   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5501   match(RegL);
 5502   match(rRegL);
 5503 
 5504   format %{ %}
 5505   interface(REG_INTER);
 5506 %}
 5507 
 5508 operand rax_RegL()
 5509 %{
 5510   constraint(ALLOC_IN_RC(long_rax_reg));
 5511   match(RegL);
 5512   match(rRegL);
 5513 
 5514   format %{ "RAX" %}
 5515   interface(REG_INTER);
 5516 %}
 5517 
 5518 operand rcx_RegL()
 5519 %{
 5520   constraint(ALLOC_IN_RC(long_rcx_reg));
 5521   match(RegL);
 5522   match(rRegL);
 5523 
 5524   format %{ %}
 5525   interface(REG_INTER);
 5526 %}
 5527 
 5528 operand rdx_RegL()
 5529 %{
 5530   constraint(ALLOC_IN_RC(long_rdx_reg));
 5531   match(RegL);
 5532   match(rRegL);
 5533 
 5534   format %{ %}
 5535   interface(REG_INTER);
 5536 %}
 5537 
 5538 operand r11_RegL()
 5539 %{
 5540   constraint(ALLOC_IN_RC(long_r11_reg));
 5541   match(RegL);
 5542   match(rRegL);
 5543 
 5544   format %{ %}
 5545   interface(REG_INTER);
 5546 %}
 5547 
 5548 operand no_rbp_r13_RegL()
 5549 %{
 5550   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5551   match(RegL);
 5552   match(rRegL);
 5553   match(rax_RegL);
 5554   match(rcx_RegL);
 5555   match(rdx_RegL);
 5556 
 5557   format %{ %}
 5558   interface(REG_INTER);
 5559 %}
 5560 
 5561 // Flags register, used as output of compare instructions
 5562 operand rFlagsReg()
 5563 %{
 5564   constraint(ALLOC_IN_RC(int_flags));
 5565   match(RegFlags);
 5566 
 5567   format %{ "RFLAGS" %}
 5568   interface(REG_INTER);
 5569 %}
 5570 
 5571 // Flags register, used as output of FLOATING POINT compare instructions
 5572 operand rFlagsRegU()
 5573 %{
 5574   constraint(ALLOC_IN_RC(int_flags));
 5575   match(RegFlags);
 5576 
 5577   format %{ "RFLAGS_U" %}
 5578   interface(REG_INTER);
 5579 %}
 5580 
 5581 operand rFlagsRegUCF() %{
 5582   constraint(ALLOC_IN_RC(int_flags));
 5583   match(RegFlags);
 5584   predicate(!UseAPX || !VM_Version::supports_avx10_2());
 5585 
 5586   format %{ "RFLAGS_U_CF" %}
 5587   interface(REG_INTER);
 5588 %}
 5589 
 5590 operand rFlagsRegUCFE() %{
 5591   constraint(ALLOC_IN_RC(int_flags));
 5592   match(RegFlags);
 5593   predicate(UseAPX && VM_Version::supports_avx10_2());
 5594 
 5595   format %{ "RFLAGS_U_CFE" %}
 5596   interface(REG_INTER);
 5597 %}
 5598 
 5599 // Float register operands
 5600 operand regF() %{
 5601    constraint(ALLOC_IN_RC(float_reg));
 5602    match(RegF);
 5603 
 5604    format %{ %}
 5605    interface(REG_INTER);
 5606 %}
 5607 
 5608 // Float register operands
 5609 operand legRegF() %{
 5610    constraint(ALLOC_IN_RC(float_reg_legacy));
 5611    match(RegF);
 5612 
 5613    format %{ %}
 5614    interface(REG_INTER);
 5615 %}
 5616 
 5617 // Float register operands
 5618 operand vlRegF() %{
 5619    constraint(ALLOC_IN_RC(float_reg_vl));
 5620    match(RegF);
 5621 
 5622    format %{ %}
 5623    interface(REG_INTER);
 5624 %}
 5625 
 5626 // Double register operands
 5627 operand regD() %{
 5628    constraint(ALLOC_IN_RC(double_reg));
 5629    match(RegD);
 5630 
 5631    format %{ %}
 5632    interface(REG_INTER);
 5633 %}
 5634 
 5635 // Double register operands
 5636 operand legRegD() %{
 5637    constraint(ALLOC_IN_RC(double_reg_legacy));
 5638    match(RegD);
 5639 
 5640    format %{ %}
 5641    interface(REG_INTER);
 5642 %}
 5643 
 5644 // Double register operands
 5645 operand vlRegD() %{
 5646    constraint(ALLOC_IN_RC(double_reg_vl));
 5647    match(RegD);
 5648 
 5649    format %{ %}
 5650    interface(REG_INTER);
 5651 %}
 5652 
 5653 //----------Memory Operands----------------------------------------------------
 5654 // Direct Memory Operand
 5655 // operand direct(immP addr)
 5656 // %{
 5657 //   match(addr);
 5658 
 5659 //   format %{ "[$addr]" %}
 5660 //   interface(MEMORY_INTER) %{
 5661 //     base(0xFFFFFFFF);
 5662 //     index(0x4);
 5663 //     scale(0x0);
 5664 //     disp($addr);
 5665 //   %}
 5666 // %}
 5667 
 5668 // Indirect Memory Operand
 5669 operand indirect(any_RegP reg)
 5670 %{
 5671   constraint(ALLOC_IN_RC(ptr_reg));
 5672   match(reg);
 5673 
 5674   format %{ "[$reg]" %}
 5675   interface(MEMORY_INTER) %{
 5676     base($reg);
 5677     index(0x4);
 5678     scale(0x0);
 5679     disp(0x0);
 5680   %}
 5681 %}
 5682 
 5683 // Indirect Memory Plus Short Offset Operand
 5684 operand indOffset8(any_RegP reg, immL8 off)
 5685 %{
 5686   constraint(ALLOC_IN_RC(ptr_reg));
 5687   match(AddP reg off);
 5688 
 5689   format %{ "[$reg + $off (8-bit)]" %}
 5690   interface(MEMORY_INTER) %{
 5691     base($reg);
 5692     index(0x4);
 5693     scale(0x0);
 5694     disp($off);
 5695   %}
 5696 %}
 5697 
 5698 // Indirect Memory Plus Long Offset Operand
 5699 operand indOffset32(any_RegP reg, immL32 off)
 5700 %{
 5701   constraint(ALLOC_IN_RC(ptr_reg));
 5702   match(AddP reg off);
 5703 
 5704   format %{ "[$reg + $off (32-bit)]" %}
 5705   interface(MEMORY_INTER) %{
 5706     base($reg);
 5707     index(0x4);
 5708     scale(0x0);
 5709     disp($off);
 5710   %}
 5711 %}
 5712 
 5713 // Indirect Memory Plus Index Register Plus Offset Operand
 5714 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5715 %{
 5716   constraint(ALLOC_IN_RC(ptr_reg));
 5717   match(AddP (AddP reg lreg) off);
 5718 
 5719   op_cost(10);
 5720   format %{"[$reg + $off + $lreg]" %}
 5721   interface(MEMORY_INTER) %{
 5722     base($reg);
 5723     index($lreg);
 5724     scale(0x0);
 5725     disp($off);
 5726   %}
 5727 %}
 5728 
 5729 // Indirect Memory Plus Index Register Plus Offset Operand
 5730 operand indIndex(any_RegP reg, rRegL lreg)
 5731 %{
 5732   constraint(ALLOC_IN_RC(ptr_reg));
 5733   match(AddP reg lreg);
 5734 
 5735   op_cost(10);
 5736   format %{"[$reg + $lreg]" %}
 5737   interface(MEMORY_INTER) %{
 5738     base($reg);
 5739     index($lreg);
 5740     scale(0x0);
 5741     disp(0x0);
 5742   %}
 5743 %}
 5744 
 5745 // Indirect Memory Times Scale Plus Index Register
 5746 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5747 %{
 5748   constraint(ALLOC_IN_RC(ptr_reg));
 5749   match(AddP reg (LShiftL lreg scale));
 5750 
 5751   op_cost(10);
 5752   format %{"[$reg + $lreg << $scale]" %}
 5753   interface(MEMORY_INTER) %{
 5754     base($reg);
 5755     index($lreg);
 5756     scale($scale);
 5757     disp(0x0);
 5758   %}
 5759 %}
 5760 
 5761 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5762 %{
 5763   constraint(ALLOC_IN_RC(ptr_reg));
 5764   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5765   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5766 
 5767   op_cost(10);
 5768   format %{"[$reg + pos $idx << $scale]" %}
 5769   interface(MEMORY_INTER) %{
 5770     base($reg);
 5771     index($idx);
 5772     scale($scale);
 5773     disp(0x0);
 5774   %}
 5775 %}
 5776 
 5777 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5778 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5779 %{
 5780   constraint(ALLOC_IN_RC(ptr_reg));
 5781   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5782 
 5783   op_cost(10);
 5784   format %{"[$reg + $off + $lreg << $scale]" %}
 5785   interface(MEMORY_INTER) %{
 5786     base($reg);
 5787     index($lreg);
 5788     scale($scale);
 5789     disp($off);
 5790   %}
 5791 %}
 5792 
 5793 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5794 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5795 %{
 5796   constraint(ALLOC_IN_RC(ptr_reg));
 5797   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5798   match(AddP (AddP reg (ConvI2L idx)) off);
 5799 
 5800   op_cost(10);
 5801   format %{"[$reg + $off + $idx]" %}
 5802   interface(MEMORY_INTER) %{
 5803     base($reg);
 5804     index($idx);
 5805     scale(0x0);
 5806     disp($off);
 5807   %}
 5808 %}
 5809 
 5810 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5811 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5812 %{
 5813   constraint(ALLOC_IN_RC(ptr_reg));
 5814   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5815   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5816 
 5817   op_cost(10);
 5818   format %{"[$reg + $off + $idx << $scale]" %}
 5819   interface(MEMORY_INTER) %{
 5820     base($reg);
 5821     index($idx);
 5822     scale($scale);
 5823     disp($off);
 5824   %}
 5825 %}
 5826 
 5827 // Indirect Narrow Oop Operand
 5828 operand indCompressedOop(rRegN reg) %{
 5829   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5830   constraint(ALLOC_IN_RC(ptr_reg));
 5831   match(DecodeN reg);
 5832 
 5833   op_cost(10);
 5834   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 5835   interface(MEMORY_INTER) %{
 5836     base(0xc); // R12
 5837     index($reg);
 5838     scale(0x3);
 5839     disp(0x0);
 5840   %}
 5841 %}
 5842 
 5843 // Indirect Narrow Oop Plus Offset Operand
 5844 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5845 // we can't free r12 even with CompressedOops::base() == nullptr.
 5846 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5847   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5848   constraint(ALLOC_IN_RC(ptr_reg));
 5849   match(AddP (DecodeN reg) off);
 5850 
 5851   op_cost(10);
 5852   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5853   interface(MEMORY_INTER) %{
 5854     base(0xc); // R12
 5855     index($reg);
 5856     scale(0x3);
 5857     disp($off);
 5858   %}
 5859 %}
 5860 
 5861 // Indirect Memory Operand
 5862 operand indirectNarrow(rRegN reg)
 5863 %{
 5864   predicate(CompressedOops::shift() == 0);
 5865   constraint(ALLOC_IN_RC(ptr_reg));
 5866   match(DecodeN reg);
 5867 
 5868   format %{ "[$reg]" %}
 5869   interface(MEMORY_INTER) %{
 5870     base($reg);
 5871     index(0x4);
 5872     scale(0x0);
 5873     disp(0x0);
 5874   %}
 5875 %}
 5876 
 5877 // Indirect Memory Plus Short Offset Operand
 5878 operand indOffset8Narrow(rRegN reg, immL8 off)
 5879 %{
 5880   predicate(CompressedOops::shift() == 0);
 5881   constraint(ALLOC_IN_RC(ptr_reg));
 5882   match(AddP (DecodeN reg) off);
 5883 
 5884   format %{ "[$reg + $off (8-bit)]" %}
 5885   interface(MEMORY_INTER) %{
 5886     base($reg);
 5887     index(0x4);
 5888     scale(0x0);
 5889     disp($off);
 5890   %}
 5891 %}
 5892 
 5893 // Indirect Memory Plus Long Offset Operand
 5894 operand indOffset32Narrow(rRegN reg, immL32 off)
 5895 %{
 5896   predicate(CompressedOops::shift() == 0);
 5897   constraint(ALLOC_IN_RC(ptr_reg));
 5898   match(AddP (DecodeN reg) off);
 5899 
 5900   format %{ "[$reg + $off (32-bit)]" %}
 5901   interface(MEMORY_INTER) %{
 5902     base($reg);
 5903     index(0x4);
 5904     scale(0x0);
 5905     disp($off);
 5906   %}
 5907 %}
 5908 
 5909 // Indirect Memory Plus Index Register Plus Offset Operand
 5910 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5911 %{
 5912   predicate(CompressedOops::shift() == 0);
 5913   constraint(ALLOC_IN_RC(ptr_reg));
 5914   match(AddP (AddP (DecodeN reg) lreg) off);
 5915 
 5916   op_cost(10);
 5917   format %{"[$reg + $off + $lreg]" %}
 5918   interface(MEMORY_INTER) %{
 5919     base($reg);
 5920     index($lreg);
 5921     scale(0x0);
 5922     disp($off);
 5923   %}
 5924 %}
 5925 
 5926 // Indirect Memory Plus Index Register Plus Offset Operand
 5927 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5928 %{
 5929   predicate(CompressedOops::shift() == 0);
 5930   constraint(ALLOC_IN_RC(ptr_reg));
 5931   match(AddP (DecodeN reg) lreg);
 5932 
 5933   op_cost(10);
 5934   format %{"[$reg + $lreg]" %}
 5935   interface(MEMORY_INTER) %{
 5936     base($reg);
 5937     index($lreg);
 5938     scale(0x0);
 5939     disp(0x0);
 5940   %}
 5941 %}
 5942 
 5943 // Indirect Memory Times Scale Plus Index Register
 5944 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5945 %{
 5946   predicate(CompressedOops::shift() == 0);
 5947   constraint(ALLOC_IN_RC(ptr_reg));
 5948   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5949 
 5950   op_cost(10);
 5951   format %{"[$reg + $lreg << $scale]" %}
 5952   interface(MEMORY_INTER) %{
 5953     base($reg);
 5954     index($lreg);
 5955     scale($scale);
 5956     disp(0x0);
 5957   %}
 5958 %}
 5959 
 5960 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5961 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5962 %{
 5963   predicate(CompressedOops::shift() == 0);
 5964   constraint(ALLOC_IN_RC(ptr_reg));
 5965   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5966 
 5967   op_cost(10);
 5968   format %{"[$reg + $off + $lreg << $scale]" %}
 5969   interface(MEMORY_INTER) %{
 5970     base($reg);
 5971     index($lreg);
 5972     scale($scale);
 5973     disp($off);
 5974   %}
 5975 %}
 5976 
 5977 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5978 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5979 %{
 5980   constraint(ALLOC_IN_RC(ptr_reg));
 5981   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5982   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5983 
 5984   op_cost(10);
 5985   format %{"[$reg + $off + $idx]" %}
 5986   interface(MEMORY_INTER) %{
 5987     base($reg);
 5988     index($idx);
 5989     scale(0x0);
 5990     disp($off);
 5991   %}
 5992 %}
 5993 
 5994 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5995 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5996 %{
 5997   constraint(ALLOC_IN_RC(ptr_reg));
 5998   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5999   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 6000 
 6001   op_cost(10);
 6002   format %{"[$reg + $off + $idx << $scale]" %}
 6003   interface(MEMORY_INTER) %{
 6004     base($reg);
 6005     index($idx);
 6006     scale($scale);
 6007     disp($off);
 6008   %}
 6009 %}
 6010 
 6011 //----------Special Memory Operands--------------------------------------------
 6012 // Stack Slot Operand - This operand is used for loading and storing temporary
 6013 //                      values on the stack where a match requires a value to
 6014 //                      flow through memory.
 6015 operand stackSlotP(sRegP reg)
 6016 %{
 6017   constraint(ALLOC_IN_RC(stack_slots));
 6018   // No match rule because this operand is only generated in matching
 6019 
 6020   format %{ "[$reg]" %}
 6021   interface(MEMORY_INTER) %{
 6022     base(0x4);   // RSP
 6023     index(0x4);  // No Index
 6024     scale(0x0);  // No Scale
 6025     disp($reg);  // Stack Offset
 6026   %}
 6027 %}
 6028 
 6029 operand stackSlotI(sRegI reg)
 6030 %{
 6031   constraint(ALLOC_IN_RC(stack_slots));
 6032   // No match rule because this operand is only generated in matching
 6033 
 6034   format %{ "[$reg]" %}
 6035   interface(MEMORY_INTER) %{
 6036     base(0x4);   // RSP
 6037     index(0x4);  // No Index
 6038     scale(0x0);  // No Scale
 6039     disp($reg);  // Stack Offset
 6040   %}
 6041 %}
 6042 
 6043 operand stackSlotF(sRegF reg)
 6044 %{
 6045   constraint(ALLOC_IN_RC(stack_slots));
 6046   // No match rule because this operand is only generated in matching
 6047 
 6048   format %{ "[$reg]" %}
 6049   interface(MEMORY_INTER) %{
 6050     base(0x4);   // RSP
 6051     index(0x4);  // No Index
 6052     scale(0x0);  // No Scale
 6053     disp($reg);  // Stack Offset
 6054   %}
 6055 %}
 6056 
 6057 operand stackSlotD(sRegD reg)
 6058 %{
 6059   constraint(ALLOC_IN_RC(stack_slots));
 6060   // No match rule because this operand is only generated in matching
 6061 
 6062   format %{ "[$reg]" %}
 6063   interface(MEMORY_INTER) %{
 6064     base(0x4);   // RSP
 6065     index(0x4);  // No Index
 6066     scale(0x0);  // No Scale
 6067     disp($reg);  // Stack Offset
 6068   %}
 6069 %}
 6070 operand stackSlotL(sRegL reg)
 6071 %{
 6072   constraint(ALLOC_IN_RC(stack_slots));
 6073   // No match rule because this operand is only generated in matching
 6074 
 6075   format %{ "[$reg]" %}
 6076   interface(MEMORY_INTER) %{
 6077     base(0x4);   // RSP
 6078     index(0x4);  // No Index
 6079     scale(0x0);  // No Scale
 6080     disp($reg);  // Stack Offset
 6081   %}
 6082 %}
 6083 
 6084 //----------Conditional Branch Operands----------------------------------------
 6085 // Comparison Op  - This is the operation of the comparison, and is limited to
 6086 //                  the following set of codes:
 6087 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6088 //
 6089 // Other attributes of the comparison, such as unsignedness, are specified
 6090 // by the comparison instruction that sets a condition code flags register.
 6091 // That result is represented by a flags operand whose subtype is appropriate
 6092 // to the unsignedness (etc.) of the comparison.
 6093 //
 6094 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6095 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6096 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6097 
 6098 // Comparison Code
 6099 operand cmpOp()
 6100 %{
 6101   match(Bool);
 6102 
 6103   format %{ "" %}
 6104   interface(COND_INTER) %{
 6105     equal(0x4, "e");
 6106     not_equal(0x5, "ne");
 6107     less(0xc, "l");
 6108     greater_equal(0xd, "ge");
 6109     less_equal(0xe, "le");
 6110     greater(0xf, "g");
 6111     overflow(0x0, "o");
 6112     no_overflow(0x1, "no");
 6113   %}
 6114 %}
 6115 
 6116 // Comparison Code, unsigned compare.  Used by FP also, with
 6117 // C2 (unordered) turned into GT or LT already.  The other bits
 6118 // C0 and C3 are turned into Carry & Zero flags.
 6119 operand cmpOpU()
 6120 %{
 6121   match(Bool);
 6122 
 6123   format %{ "" %}
 6124   interface(COND_INTER) %{
 6125     equal(0x4, "e");
 6126     not_equal(0x5, "ne");
 6127     less(0x2, "b");
 6128     greater_equal(0x3, "ae");
 6129     less_equal(0x6, "be");
 6130     greater(0x7, "a");
 6131     overflow(0x0, "o");
 6132     no_overflow(0x1, "no");
 6133   %}
 6134 %}
 6135 
 6136 
 6137 // Floating comparisons that don't require any fixup for the unordered case,
 6138 // If both inputs of the comparison are the same, ZF is always set so we
 6139 // don't need to use cmpOpUCF2 for eq/ne
 6140 operand cmpOpUCF() %{
 6141   match(Bool);
 6142   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6143             (n->as_Bool()->_test._test == BoolTest::lt ||
 6144              n->as_Bool()->_test._test == BoolTest::ge ||
 6145              n->as_Bool()->_test._test == BoolTest::le ||
 6146              n->as_Bool()->_test._test == BoolTest::gt ||
 6147              n->in(1)->in(1) == n->in(1)->in(2)));
 6148   format %{ "" %}
 6149   interface(COND_INTER) %{
 6150     equal(0xb, "np");
 6151     not_equal(0xa, "p");
 6152     less(0x2, "b");
 6153     greater_equal(0x3, "ae");
 6154     less_equal(0x6, "be");
 6155     greater(0x7, "a");
 6156     overflow(0x0, "o");
 6157     no_overflow(0x1, "no");
 6158   %}
 6159 %}
 6160 
 6161 
 6162 // Floating comparisons that can be fixed up with extra conditional jumps
 6163 operand cmpOpUCF2() %{
 6164   match(Bool);
 6165   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6166             (n->as_Bool()->_test._test == BoolTest::ne ||
 6167              n->as_Bool()->_test._test == BoolTest::eq) &&
 6168             n->in(1)->in(1) != n->in(1)->in(2));
 6169   format %{ "" %}
 6170   interface(COND_INTER) %{
 6171     equal(0x4, "e");
 6172     not_equal(0x5, "ne");
 6173     less(0x2, "b");
 6174     greater_equal(0x3, "ae");
 6175     less_equal(0x6, "be");
 6176     greater(0x7, "a");
 6177     overflow(0x0, "o");
 6178     no_overflow(0x1, "no");
 6179   %}
 6180 %}
 6181 
 6182 
 6183 // Floating point comparisons that set condition flags to test more directly,
 6184 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
 6185 // are used for L (<) and LE (<=) conditions. It's important to convert these
 6186 // latter conditions to ones that use unsigned tests before passing into an
 6187 // instruction because the preceding comparison might be based on a three way
 6188 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
 6189 operand cmpOpUCFE()
 6190 %{
 6191   match(Bool);
 6192   predicate((UseAPX && VM_Version::supports_avx10_2()) &&
 6193             (n->as_Bool()->_test._test == BoolTest::ne ||
 6194              n->as_Bool()->_test._test == BoolTest::eq ||
 6195              n->as_Bool()->_test._test == BoolTest::lt ||
 6196              n->as_Bool()->_test._test == BoolTest::ge ||
 6197              n->as_Bool()->_test._test == BoolTest::le ||
 6198              n->as_Bool()->_test._test == BoolTest::gt));
 6199 
 6200   format %{ "" %}
 6201   interface(COND_INTER) %{
 6202     equal(0x4, "e");
 6203     not_equal(0x5, "ne");
 6204     less(0x2, "b");
 6205     greater_equal(0x3, "ae");
 6206     less_equal(0x6, "be");
 6207     greater(0x7, "a");
 6208     overflow(0x0, "o");
 6209     no_overflow(0x1, "no");
 6210   %}
 6211 %}
 6212 
 6213 // Operands for bound floating pointer register arguments
 6214 operand rxmm0() %{
 6215   constraint(ALLOC_IN_RC(xmm0_reg));
 6216   match(VecX);
 6217   format%{%}
 6218   interface(REG_INTER);
 6219 %}
 6220 
 6221 // Vectors
 6222 
 6223 // Dummy generic vector class. Should be used for all vector operands.
 6224 // Replaced with vec[SDXYZ] during post-selection pass.
 6225 operand vec() %{
 6226   constraint(ALLOC_IN_RC(dynamic));
 6227   match(VecX);
 6228   match(VecY);
 6229   match(VecZ);
 6230   match(VecS);
 6231   match(VecD);
 6232 
 6233   format %{ %}
 6234   interface(REG_INTER);
 6235 %}
 6236 
 6237 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6238 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6239 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6240 // runtime code generation via reg_class_dynamic.
 6241 operand legVec() %{
 6242   constraint(ALLOC_IN_RC(dynamic));
 6243   match(VecX);
 6244   match(VecY);
 6245   match(VecZ);
 6246   match(VecS);
 6247   match(VecD);
 6248 
 6249   format %{ %}
 6250   interface(REG_INTER);
 6251 %}
 6252 
 6253 // Replaces vec during post-selection cleanup. See above.
 6254 operand vecS() %{
 6255   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6256   match(VecS);
 6257 
 6258   format %{ %}
 6259   interface(REG_INTER);
 6260 %}
 6261 
 6262 // Replaces legVec during post-selection cleanup. See above.
 6263 operand legVecS() %{
 6264   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6265   match(VecS);
 6266 
 6267   format %{ %}
 6268   interface(REG_INTER);
 6269 %}
 6270 
 6271 // Replaces vec during post-selection cleanup. See above.
 6272 operand vecD() %{
 6273   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6274   match(VecD);
 6275 
 6276   format %{ %}
 6277   interface(REG_INTER);
 6278 %}
 6279 
 6280 // Replaces legVec during post-selection cleanup. See above.
 6281 operand legVecD() %{
 6282   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6283   match(VecD);
 6284 
 6285   format %{ %}
 6286   interface(REG_INTER);
 6287 %}
 6288 
 6289 // Replaces vec during post-selection cleanup. See above.
 6290 operand vecX() %{
 6291   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6292   match(VecX);
 6293 
 6294   format %{ %}
 6295   interface(REG_INTER);
 6296 %}
 6297 
 6298 // Replaces legVec during post-selection cleanup. See above.
 6299 operand legVecX() %{
 6300   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6301   match(VecX);
 6302 
 6303   format %{ %}
 6304   interface(REG_INTER);
 6305 %}
 6306 
 6307 // Replaces vec during post-selection cleanup. See above.
 6308 operand vecY() %{
 6309   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6310   match(VecY);
 6311 
 6312   format %{ %}
 6313   interface(REG_INTER);
 6314 %}
 6315 
 6316 // Replaces legVec during post-selection cleanup. See above.
 6317 operand legVecY() %{
 6318   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6319   match(VecY);
 6320 
 6321   format %{ %}
 6322   interface(REG_INTER);
 6323 %}
 6324 
 6325 // Replaces vec during post-selection cleanup. See above.
 6326 operand vecZ() %{
 6327   constraint(ALLOC_IN_RC(vectorz_reg));
 6328   match(VecZ);
 6329 
 6330   format %{ %}
 6331   interface(REG_INTER);
 6332 %}
 6333 
 6334 // Replaces legVec during post-selection cleanup. See above.
 6335 operand legVecZ() %{
 6336   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6337   match(VecZ);
 6338 
 6339   format %{ %}
 6340   interface(REG_INTER);
 6341 %}
 6342 
 6343 //----------OPERAND CLASSES----------------------------------------------------
 6344 // Operand Classes are groups of operands that are used as to simplify
 6345 // instruction definitions by not requiring the AD writer to specify separate
 6346 // instructions for every form of operand when the instruction accepts
 6347 // multiple operand types with the same basic encoding and format.  The classic
 6348 // case of this is memory operands.
 6349 
 6350 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6351                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6352                indCompressedOop, indCompressedOopOffset,
 6353                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6354                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6355                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6356 
 6357 //----------PIPELINE-----------------------------------------------------------
 6358 // Rules which define the behavior of the target architectures pipeline.
 6359 pipeline %{
 6360 
 6361 //----------ATTRIBUTES---------------------------------------------------------
 6362 attributes %{
 6363   variable_size_instructions;        // Fixed size instructions
 6364   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6365   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6366   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6367   instruction_fetch_units = 1;       // of 16 bytes
 6368 %}
 6369 
 6370 //----------RESOURCES----------------------------------------------------------
 6371 // Resources are the functional units available to the machine
 6372 
 6373 // Generic P2/P3 pipeline
 6374 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6375 // 3 instructions decoded per cycle.
 6376 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6377 // 3 ALU op, only ALU0 handles mul instructions.
 6378 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6379            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6380            BR, FPU,
 6381            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6382 
 6383 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6384 // Pipeline Description specifies the stages in the machine's pipeline
 6385 
 6386 // Generic P2/P3 pipeline
 6387 pipe_desc(S0, S1, S2, S3, S4, S5);
 6388 
 6389 //----------PIPELINE CLASSES---------------------------------------------------
 6390 // Pipeline Classes describe the stages in which input and output are
 6391 // referenced by the hardware pipeline.
 6392 
 6393 // Naming convention: ialu or fpu
 6394 // Then: _reg
 6395 // Then: _reg if there is a 2nd register
 6396 // Then: _long if it's a pair of instructions implementing a long
 6397 // Then: _fat if it requires the big decoder
 6398 //   Or: _mem if it requires the big decoder and a memory unit.
 6399 
 6400 // Integer ALU reg operation
 6401 pipe_class ialu_reg(rRegI dst)
 6402 %{
 6403     single_instruction;
 6404     dst    : S4(write);
 6405     dst    : S3(read);
 6406     DECODE : S0;        // any decoder
 6407     ALU    : S3;        // any alu
 6408 %}
 6409 
 6410 // Long ALU reg operation
 6411 pipe_class ialu_reg_long(rRegL dst)
 6412 %{
 6413     instruction_count(2);
 6414     dst    : S4(write);
 6415     dst    : S3(read);
 6416     DECODE : S0(2);     // any 2 decoders
 6417     ALU    : S3(2);     // both alus
 6418 %}
 6419 
 6420 // Integer ALU reg operation using big decoder
 6421 pipe_class ialu_reg_fat(rRegI dst)
 6422 %{
 6423     single_instruction;
 6424     dst    : S4(write);
 6425     dst    : S3(read);
 6426     D0     : S0;        // big decoder only
 6427     ALU    : S3;        // any alu
 6428 %}
 6429 
 6430 // Integer ALU reg-reg operation
 6431 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6432 %{
 6433     single_instruction;
 6434     dst    : S4(write);
 6435     src    : S3(read);
 6436     DECODE : S0;        // any decoder
 6437     ALU    : S3;        // any alu
 6438 %}
 6439 
 6440 // Integer ALU reg-reg operation
 6441 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6442 %{
 6443     single_instruction;
 6444     dst    : S4(write);
 6445     src    : S3(read);
 6446     D0     : S0;        // big decoder only
 6447     ALU    : S3;        // any alu
 6448 %}
 6449 
 6450 // Integer ALU reg-mem operation
 6451 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6452 %{
 6453     single_instruction;
 6454     dst    : S5(write);
 6455     mem    : S3(read);
 6456     D0     : S0;        // big decoder only
 6457     ALU    : S4;        // any alu
 6458     MEM    : S3;        // any mem
 6459 %}
 6460 
 6461 // Integer mem operation (prefetch)
 6462 pipe_class ialu_mem(memory mem)
 6463 %{
 6464     single_instruction;
 6465     mem    : S3(read);
 6466     D0     : S0;        // big decoder only
 6467     MEM    : S3;        // any mem
 6468 %}
 6469 
 6470 // Integer Store to Memory
 6471 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6472 %{
 6473     single_instruction;
 6474     mem    : S3(read);
 6475     src    : S5(read);
 6476     D0     : S0;        // big decoder only
 6477     ALU    : S4;        // any alu
 6478     MEM    : S3;
 6479 %}
 6480 
 6481 // // Long Store to Memory
 6482 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6483 // %{
 6484 //     instruction_count(2);
 6485 //     mem    : S3(read);
 6486 //     src    : S5(read);
 6487 //     D0     : S0(2);          // big decoder only; twice
 6488 //     ALU    : S4(2);     // any 2 alus
 6489 //     MEM    : S3(2);  // Both mems
 6490 // %}
 6491 
 6492 // Integer Store to Memory
 6493 pipe_class ialu_mem_imm(memory mem)
 6494 %{
 6495     single_instruction;
 6496     mem    : S3(read);
 6497     D0     : S0;        // big decoder only
 6498     ALU    : S4;        // any alu
 6499     MEM    : S3;
 6500 %}
 6501 
 6502 // Integer ALU0 reg-reg operation
 6503 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6504 %{
 6505     single_instruction;
 6506     dst    : S4(write);
 6507     src    : S3(read);
 6508     D0     : S0;        // Big decoder only
 6509     ALU0   : S3;        // only alu0
 6510 %}
 6511 
 6512 // Integer ALU0 reg-mem operation
 6513 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6514 %{
 6515     single_instruction;
 6516     dst    : S5(write);
 6517     mem    : S3(read);
 6518     D0     : S0;        // big decoder only
 6519     ALU0   : S4;        // ALU0 only
 6520     MEM    : S3;        // any mem
 6521 %}
 6522 
 6523 // Integer ALU reg-reg operation
 6524 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6525 %{
 6526     single_instruction;
 6527     cr     : S4(write);
 6528     src1   : S3(read);
 6529     src2   : S3(read);
 6530     DECODE : S0;        // any decoder
 6531     ALU    : S3;        // any alu
 6532 %}
 6533 
 6534 // Integer ALU reg-imm operation
 6535 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6536 %{
 6537     single_instruction;
 6538     cr     : S4(write);
 6539     src1   : S3(read);
 6540     DECODE : S0;        // any decoder
 6541     ALU    : S3;        // any alu
 6542 %}
 6543 
 6544 // Integer ALU reg-mem operation
 6545 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6546 %{
 6547     single_instruction;
 6548     cr     : S4(write);
 6549     src1   : S3(read);
 6550     src2   : S3(read);
 6551     D0     : S0;        // big decoder only
 6552     ALU    : S4;        // any alu
 6553     MEM    : S3;
 6554 %}
 6555 
 6556 // Conditional move reg-reg
 6557 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6558 %{
 6559     instruction_count(4);
 6560     y      : S4(read);
 6561     q      : S3(read);
 6562     p      : S3(read);
 6563     DECODE : S0(4);     // any decoder
 6564 %}
 6565 
 6566 // Conditional move reg-reg
 6567 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6568 %{
 6569     single_instruction;
 6570     dst    : S4(write);
 6571     src    : S3(read);
 6572     cr     : S3(read);
 6573     DECODE : S0;        // any decoder
 6574 %}
 6575 
 6576 // Conditional move reg-mem
 6577 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6578 %{
 6579     single_instruction;
 6580     dst    : S4(write);
 6581     src    : S3(read);
 6582     cr     : S3(read);
 6583     DECODE : S0;        // any decoder
 6584     MEM    : S3;
 6585 %}
 6586 
 6587 // Conditional move reg-reg long
 6588 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6589 %{
 6590     single_instruction;
 6591     dst    : S4(write);
 6592     src    : S3(read);
 6593     cr     : S3(read);
 6594     DECODE : S0(2);     // any 2 decoders
 6595 %}
 6596 
 6597 // Float reg-reg operation
 6598 pipe_class fpu_reg(regD dst)
 6599 %{
 6600     instruction_count(2);
 6601     dst    : S3(read);
 6602     DECODE : S0(2);     // any 2 decoders
 6603     FPU    : S3;
 6604 %}
 6605 
 6606 // Float reg-reg operation
 6607 pipe_class fpu_reg_reg(regD dst, regD src)
 6608 %{
 6609     instruction_count(2);
 6610     dst    : S4(write);
 6611     src    : S3(read);
 6612     DECODE : S0(2);     // any 2 decoders
 6613     FPU    : S3;
 6614 %}
 6615 
 6616 // Float reg-reg operation
 6617 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6618 %{
 6619     instruction_count(3);
 6620     dst    : S4(write);
 6621     src1   : S3(read);
 6622     src2   : S3(read);
 6623     DECODE : S0(3);     // any 3 decoders
 6624     FPU    : S3(2);
 6625 %}
 6626 
 6627 // Float reg-reg operation
 6628 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6629 %{
 6630     instruction_count(4);
 6631     dst    : S4(write);
 6632     src1   : S3(read);
 6633     src2   : S3(read);
 6634     src3   : S3(read);
 6635     DECODE : S0(4);     // any 3 decoders
 6636     FPU    : S3(2);
 6637 %}
 6638 
 6639 // Float reg-reg operation
 6640 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6641 %{
 6642     instruction_count(4);
 6643     dst    : S4(write);
 6644     src1   : S3(read);
 6645     src2   : S3(read);
 6646     src3   : S3(read);
 6647     DECODE : S1(3);     // any 3 decoders
 6648     D0     : S0;        // Big decoder only
 6649     FPU    : S3(2);
 6650     MEM    : S3;
 6651 %}
 6652 
 6653 // Float reg-mem operation
 6654 pipe_class fpu_reg_mem(regD dst, memory mem)
 6655 %{
 6656     instruction_count(2);
 6657     dst    : S5(write);
 6658     mem    : S3(read);
 6659     D0     : S0;        // big decoder only
 6660     DECODE : S1;        // any decoder for FPU POP
 6661     FPU    : S4;
 6662     MEM    : S3;        // any mem
 6663 %}
 6664 
 6665 // Float reg-mem operation
 6666 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6667 %{
 6668     instruction_count(3);
 6669     dst    : S5(write);
 6670     src1   : S3(read);
 6671     mem    : S3(read);
 6672     D0     : S0;        // big decoder only
 6673     DECODE : S1(2);     // any decoder for FPU POP
 6674     FPU    : S4;
 6675     MEM    : S3;        // any mem
 6676 %}
 6677 
 6678 // Float mem-reg operation
 6679 pipe_class fpu_mem_reg(memory mem, regD src)
 6680 %{
 6681     instruction_count(2);
 6682     src    : S5(read);
 6683     mem    : S3(read);
 6684     DECODE : S0;        // any decoder for FPU PUSH
 6685     D0     : S1;        // big decoder only
 6686     FPU    : S4;
 6687     MEM    : S3;        // any mem
 6688 %}
 6689 
 6690 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6691 %{
 6692     instruction_count(3);
 6693     src1   : S3(read);
 6694     src2   : S3(read);
 6695     mem    : S3(read);
 6696     DECODE : S0(2);     // any decoder for FPU PUSH
 6697     D0     : S1;        // big decoder only
 6698     FPU    : S4;
 6699     MEM    : S3;        // any mem
 6700 %}
 6701 
 6702 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6703 %{
 6704     instruction_count(3);
 6705     src1   : S3(read);
 6706     src2   : S3(read);
 6707     mem    : S4(read);
 6708     DECODE : S0;        // any decoder for FPU PUSH
 6709     D0     : S0(2);     // big decoder only
 6710     FPU    : S4;
 6711     MEM    : S3(2);     // any mem
 6712 %}
 6713 
 6714 pipe_class fpu_mem_mem(memory dst, memory src1)
 6715 %{
 6716     instruction_count(2);
 6717     src1   : S3(read);
 6718     dst    : S4(read);
 6719     D0     : S0(2);     // big decoder only
 6720     MEM    : S3(2);     // any mem
 6721 %}
 6722 
 6723 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6724 %{
 6725     instruction_count(3);
 6726     src1   : S3(read);
 6727     src2   : S3(read);
 6728     dst    : S4(read);
 6729     D0     : S0(3);     // big decoder only
 6730     FPU    : S4;
 6731     MEM    : S3(3);     // any mem
 6732 %}
 6733 
 6734 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6735 %{
 6736     instruction_count(3);
 6737     src1   : S4(read);
 6738     mem    : S4(read);
 6739     DECODE : S0;        // any decoder for FPU PUSH
 6740     D0     : S0(2);     // big decoder only
 6741     FPU    : S4;
 6742     MEM    : S3(2);     // any mem
 6743 %}
 6744 
 6745 // Float load constant
 6746 pipe_class fpu_reg_con(regD dst)
 6747 %{
 6748     instruction_count(2);
 6749     dst    : S5(write);
 6750     D0     : S0;        // big decoder only for the load
 6751     DECODE : S1;        // any decoder for FPU POP
 6752     FPU    : S4;
 6753     MEM    : S3;        // any mem
 6754 %}
 6755 
 6756 // Float load constant
 6757 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6758 %{
 6759     instruction_count(3);
 6760     dst    : S5(write);
 6761     src    : S3(read);
 6762     D0     : S0;        // big decoder only for the load
 6763     DECODE : S1(2);     // any decoder for FPU POP
 6764     FPU    : S4;
 6765     MEM    : S3;        // any mem
 6766 %}
 6767 
 6768 // UnConditional branch
 6769 pipe_class pipe_jmp(label labl)
 6770 %{
 6771     single_instruction;
 6772     BR   : S3;
 6773 %}
 6774 
 6775 // Conditional branch
 6776 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6777 %{
 6778     single_instruction;
 6779     cr    : S1(read);
 6780     BR    : S3;
 6781 %}
 6782 
 6783 // Allocation idiom
 6784 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6785 %{
 6786     instruction_count(1); force_serialization;
 6787     fixed_latency(6);
 6788     heap_ptr : S3(read);
 6789     DECODE   : S0(3);
 6790     D0       : S2;
 6791     MEM      : S3;
 6792     ALU      : S3(2);
 6793     dst      : S5(write);
 6794     BR       : S5;
 6795 %}
 6796 
 6797 // Generic big/slow expanded idiom
 6798 pipe_class pipe_slow()
 6799 %{
 6800     instruction_count(10); multiple_bundles; force_serialization;
 6801     fixed_latency(100);
 6802     D0  : S0(2);
 6803     MEM : S3(2);
 6804 %}
 6805 
 6806 // The real do-nothing guy
 6807 pipe_class empty()
 6808 %{
 6809     instruction_count(0);
 6810 %}
 6811 
 6812 // Define the class for the Nop node
 6813 define
 6814 %{
 6815    MachNop = empty;
 6816 %}
 6817 
 6818 %}
 6819 
 6820 //----------INSTRUCTIONS-------------------------------------------------------
 6821 //
 6822 // match      -- States which machine-independent subtree may be replaced
 6823 //               by this instruction.
 6824 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6825 //               selection to identify a minimum cost tree of machine
 6826 //               instructions that matches a tree of machine-independent
 6827 //               instructions.
 6828 // format     -- A string providing the disassembly for this instruction.
 6829 //               The value of an instruction's operand may be inserted
 6830 //               by referring to it with a '$' prefix.
 6831 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6832 //               to within an encode class as $primary, $secondary, and $tertiary
 6833 //               rrspectively.  The primary opcode is commonly used to
 6834 //               indicate the type of machine instruction, while secondary
 6835 //               and tertiary are often used for prefix options or addressing
 6836 //               modes.
 6837 // ins_encode -- A list of encode classes with parameters. The encode class
 6838 //               name must have been defined in an 'enc_class' specification
 6839 //               in the encode section of the architecture description.
 6840 
 6841 // ============================================================================
 6842 
 6843 instruct ShouldNotReachHere() %{
 6844   match(Halt);
 6845   format %{ "stop\t# ShouldNotReachHere" %}
 6846   ins_encode %{
 6847     if (is_reachable()) {
 6848       const char* str = __ code_string(_halt_reason);
 6849       __ stop(str);
 6850     }
 6851   %}
 6852   ins_pipe(pipe_slow);
 6853 %}
 6854 
 6855 // ============================================================================
 6856 
 6857 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6858 // Load Float
 6859 instruct MoveF2VL(vlRegF dst, regF src) %{
 6860   match(Set dst src);
 6861   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6862   ins_encode %{
 6863     ShouldNotReachHere();
 6864   %}
 6865   ins_pipe( fpu_reg_reg );
 6866 %}
 6867 
 6868 // Load Float
 6869 instruct MoveF2LEG(legRegF dst, regF src) %{
 6870   match(Set dst src);
 6871   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6872   ins_encode %{
 6873     ShouldNotReachHere();
 6874   %}
 6875   ins_pipe( fpu_reg_reg );
 6876 %}
 6877 
 6878 // Load Float
 6879 instruct MoveVL2F(regF dst, vlRegF src) %{
 6880   match(Set dst src);
 6881   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6882   ins_encode %{
 6883     ShouldNotReachHere();
 6884   %}
 6885   ins_pipe( fpu_reg_reg );
 6886 %}
 6887 
 6888 // Load Float
 6889 instruct MoveLEG2F(regF dst, legRegF src) %{
 6890   match(Set dst src);
 6891   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6892   ins_encode %{
 6893     ShouldNotReachHere();
 6894   %}
 6895   ins_pipe( fpu_reg_reg );
 6896 %}
 6897 
 6898 // Load Double
 6899 instruct MoveD2VL(vlRegD dst, regD src) %{
 6900   match(Set dst src);
 6901   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6902   ins_encode %{
 6903     ShouldNotReachHere();
 6904   %}
 6905   ins_pipe( fpu_reg_reg );
 6906 %}
 6907 
 6908 // Load Double
 6909 instruct MoveD2LEG(legRegD dst, regD src) %{
 6910   match(Set dst src);
 6911   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6912   ins_encode %{
 6913     ShouldNotReachHere();
 6914   %}
 6915   ins_pipe( fpu_reg_reg );
 6916 %}
 6917 
 6918 // Load Double
 6919 instruct MoveVL2D(regD dst, vlRegD src) %{
 6920   match(Set dst src);
 6921   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6922   ins_encode %{
 6923     ShouldNotReachHere();
 6924   %}
 6925   ins_pipe( fpu_reg_reg );
 6926 %}
 6927 
 6928 // Load Double
 6929 instruct MoveLEG2D(regD dst, legRegD src) %{
 6930   match(Set dst src);
 6931   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6932   ins_encode %{
 6933     ShouldNotReachHere();
 6934   %}
 6935   ins_pipe( fpu_reg_reg );
 6936 %}
 6937 
 6938 //----------Load/Store/Move Instructions---------------------------------------
 6939 //----------Load Instructions--------------------------------------------------
 6940 
 6941 // Load Byte (8 bit signed)
 6942 instruct loadB(rRegI dst, memory mem)
 6943 %{
 6944   match(Set dst (LoadB mem));
 6945 
 6946   ins_cost(125);
 6947   format %{ "movsbl  $dst, $mem\t# byte" %}
 6948 
 6949   ins_encode %{
 6950     __ movsbl($dst$$Register, $mem$$Address);
 6951   %}
 6952 
 6953   ins_pipe(ialu_reg_mem);
 6954 %}
 6955 
 6956 // Load Byte (8 bit signed) into Long Register
 6957 instruct loadB2L(rRegL dst, memory mem)
 6958 %{
 6959   match(Set dst (ConvI2L (LoadB mem)));
 6960 
 6961   ins_cost(125);
 6962   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6963 
 6964   ins_encode %{
 6965     __ movsbq($dst$$Register, $mem$$Address);
 6966   %}
 6967 
 6968   ins_pipe(ialu_reg_mem);
 6969 %}
 6970 
 6971 // Load Unsigned Byte (8 bit UNsigned)
 6972 instruct loadUB(rRegI dst, memory mem)
 6973 %{
 6974   match(Set dst (LoadUB mem));
 6975 
 6976   ins_cost(125);
 6977   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6978 
 6979   ins_encode %{
 6980     __ movzbl($dst$$Register, $mem$$Address);
 6981   %}
 6982 
 6983   ins_pipe(ialu_reg_mem);
 6984 %}
 6985 
 6986 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6987 instruct loadUB2L(rRegL dst, memory mem)
 6988 %{
 6989   match(Set dst (ConvI2L (LoadUB mem)));
 6990 
 6991   ins_cost(125);
 6992   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6993 
 6994   ins_encode %{
 6995     __ movzbq($dst$$Register, $mem$$Address);
 6996   %}
 6997 
 6998   ins_pipe(ialu_reg_mem);
 6999 %}
 7000 
 7001 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 7002 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7003   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 7004   effect(KILL cr);
 7005 
 7006   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 7007             "andl    $dst, right_n_bits($mask, 8)" %}
 7008   ins_encode %{
 7009     Register Rdst = $dst$$Register;
 7010     __ movzbq(Rdst, $mem$$Address);
 7011     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 7012   %}
 7013   ins_pipe(ialu_reg_mem);
 7014 %}
 7015 
 7016 // Load Short (16 bit signed)
 7017 instruct loadS(rRegI dst, memory mem)
 7018 %{
 7019   match(Set dst (LoadS mem));
 7020 
 7021   ins_cost(125);
 7022   format %{ "movswl $dst, $mem\t# short" %}
 7023 
 7024   ins_encode %{
 7025     __ movswl($dst$$Register, $mem$$Address);
 7026   %}
 7027 
 7028   ins_pipe(ialu_reg_mem);
 7029 %}
 7030 
 7031 // Load Short (16 bit signed) to Byte (8 bit signed)
 7032 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7033   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 7034 
 7035   ins_cost(125);
 7036   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 7037   ins_encode %{
 7038     __ movsbl($dst$$Register, $mem$$Address);
 7039   %}
 7040   ins_pipe(ialu_reg_mem);
 7041 %}
 7042 
 7043 // Load Short (16 bit signed) into Long Register
 7044 instruct loadS2L(rRegL dst, memory mem)
 7045 %{
 7046   match(Set dst (ConvI2L (LoadS mem)));
 7047 
 7048   ins_cost(125);
 7049   format %{ "movswq $dst, $mem\t# short -> long" %}
 7050 
 7051   ins_encode %{
 7052     __ movswq($dst$$Register, $mem$$Address);
 7053   %}
 7054 
 7055   ins_pipe(ialu_reg_mem);
 7056 %}
 7057 
 7058 // Load Unsigned Short/Char (16 bit UNsigned)
 7059 instruct loadUS(rRegI dst, memory mem)
 7060 %{
 7061   match(Set dst (LoadUS mem));
 7062 
 7063   ins_cost(125);
 7064   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 7065 
 7066   ins_encode %{
 7067     __ movzwl($dst$$Register, $mem$$Address);
 7068   %}
 7069 
 7070   ins_pipe(ialu_reg_mem);
 7071 %}
 7072 
 7073 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 7074 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7075   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 7076 
 7077   ins_cost(125);
 7078   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 7079   ins_encode %{
 7080     __ movsbl($dst$$Register, $mem$$Address);
 7081   %}
 7082   ins_pipe(ialu_reg_mem);
 7083 %}
 7084 
 7085 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 7086 instruct loadUS2L(rRegL dst, memory mem)
 7087 %{
 7088   match(Set dst (ConvI2L (LoadUS mem)));
 7089 
 7090   ins_cost(125);
 7091   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 7092 
 7093   ins_encode %{
 7094     __ movzwq($dst$$Register, $mem$$Address);
 7095   %}
 7096 
 7097   ins_pipe(ialu_reg_mem);
 7098 %}
 7099 
 7100 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7101 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7102   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7103 
 7104   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7105   ins_encode %{
 7106     __ movzbq($dst$$Register, $mem$$Address);
 7107   %}
 7108   ins_pipe(ialu_reg_mem);
 7109 %}
 7110 
 7111 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7112 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7113   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7114   effect(KILL cr);
 7115 
 7116   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7117             "andl    $dst, right_n_bits($mask, 16)" %}
 7118   ins_encode %{
 7119     Register Rdst = $dst$$Register;
 7120     __ movzwq(Rdst, $mem$$Address);
 7121     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7122   %}
 7123   ins_pipe(ialu_reg_mem);
 7124 %}
 7125 
 7126 // Load Integer
 7127 instruct loadI(rRegI dst, memory mem)
 7128 %{
 7129   match(Set dst (LoadI mem));
 7130 
 7131   ins_cost(125);
 7132   format %{ "movl    $dst, $mem\t# int" %}
 7133 
 7134   ins_encode %{
 7135     __ movl($dst$$Register, $mem$$Address);
 7136   %}
 7137 
 7138   ins_pipe(ialu_reg_mem);
 7139 %}
 7140 
 7141 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7142 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7143   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7144 
 7145   ins_cost(125);
 7146   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7147   ins_encode %{
 7148     __ movsbl($dst$$Register, $mem$$Address);
 7149   %}
 7150   ins_pipe(ialu_reg_mem);
 7151 %}
 7152 
 7153 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7154 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7155   match(Set dst (AndI (LoadI mem) mask));
 7156 
 7157   ins_cost(125);
 7158   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7159   ins_encode %{
 7160     __ movzbl($dst$$Register, $mem$$Address);
 7161   %}
 7162   ins_pipe(ialu_reg_mem);
 7163 %}
 7164 
 7165 // Load Integer (32 bit signed) to Short (16 bit signed)
 7166 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7167   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7168 
 7169   ins_cost(125);
 7170   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7171   ins_encode %{
 7172     __ movswl($dst$$Register, $mem$$Address);
 7173   %}
 7174   ins_pipe(ialu_reg_mem);
 7175 %}
 7176 
 7177 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7178 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7179   match(Set dst (AndI (LoadI mem) mask));
 7180 
 7181   ins_cost(125);
 7182   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7183   ins_encode %{
 7184     __ movzwl($dst$$Register, $mem$$Address);
 7185   %}
 7186   ins_pipe(ialu_reg_mem);
 7187 %}
 7188 
 7189 // Load Integer into Long Register
 7190 instruct loadI2L(rRegL dst, memory mem)
 7191 %{
 7192   match(Set dst (ConvI2L (LoadI mem)));
 7193 
 7194   ins_cost(125);
 7195   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7196 
 7197   ins_encode %{
 7198     __ movslq($dst$$Register, $mem$$Address);
 7199   %}
 7200 
 7201   ins_pipe(ialu_reg_mem);
 7202 %}
 7203 
 7204 // Load Integer with mask 0xFF into Long Register
 7205 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7206   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7207 
 7208   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7209   ins_encode %{
 7210     __ movzbq($dst$$Register, $mem$$Address);
 7211   %}
 7212   ins_pipe(ialu_reg_mem);
 7213 %}
 7214 
 7215 // Load Integer with mask 0xFFFF into Long Register
 7216 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7217   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7218 
 7219   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7220   ins_encode %{
 7221     __ movzwq($dst$$Register, $mem$$Address);
 7222   %}
 7223   ins_pipe(ialu_reg_mem);
 7224 %}
 7225 
 7226 // Load Integer with a 31-bit mask into Long Register
 7227 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7228   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7229   effect(KILL cr);
 7230 
 7231   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7232             "andl    $dst, $mask" %}
 7233   ins_encode %{
 7234     Register Rdst = $dst$$Register;
 7235     __ movl(Rdst, $mem$$Address);
 7236     __ andl(Rdst, $mask$$constant);
 7237   %}
 7238   ins_pipe(ialu_reg_mem);
 7239 %}
 7240 
 7241 // Load Unsigned Integer into Long Register
 7242 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7243 %{
 7244   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7245 
 7246   ins_cost(125);
 7247   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7248 
 7249   ins_encode %{
 7250     __ movl($dst$$Register, $mem$$Address);
 7251   %}
 7252 
 7253   ins_pipe(ialu_reg_mem);
 7254 %}
 7255 
 7256 // Load Long
 7257 instruct loadL(rRegL dst, memory mem)
 7258 %{
 7259   match(Set dst (LoadL mem));
 7260 
 7261   ins_cost(125);
 7262   format %{ "movq    $dst, $mem\t# long" %}
 7263 
 7264   ins_encode %{
 7265     __ movq($dst$$Register, $mem$$Address);
 7266   %}
 7267 
 7268   ins_pipe(ialu_reg_mem); // XXX
 7269 %}
 7270 
 7271 // Load Range
 7272 instruct loadRange(rRegI dst, memory mem)
 7273 %{
 7274   match(Set dst (LoadRange mem));
 7275 
 7276   ins_cost(125); // XXX
 7277   format %{ "movl    $dst, $mem\t# range" %}
 7278   ins_encode %{
 7279     __ movl($dst$$Register, $mem$$Address);
 7280   %}
 7281   ins_pipe(ialu_reg_mem);
 7282 %}
 7283 
 7284 // Load Pointer
 7285 instruct loadP(rRegP dst, memory mem)
 7286 %{
 7287   match(Set dst (LoadP mem));
 7288   predicate(n->as_Load()->barrier_data() == 0);
 7289 
 7290   ins_cost(125); // XXX
 7291   format %{ "movq    $dst, $mem\t# ptr" %}
 7292   ins_encode %{
 7293     __ movq($dst$$Register, $mem$$Address);
 7294   %}
 7295   ins_pipe(ialu_reg_mem); // XXX
 7296 %}
 7297 
 7298 // Load Compressed Pointer
 7299 instruct loadN(rRegN dst, memory mem)
 7300 %{
 7301    predicate(n->as_Load()->barrier_data() == 0);
 7302    match(Set dst (LoadN mem));
 7303 
 7304    ins_cost(125); // XXX
 7305    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7306    ins_encode %{
 7307      __ movl($dst$$Register, $mem$$Address);
 7308    %}
 7309    ins_pipe(ialu_reg_mem); // XXX
 7310 %}
 7311 
 7312 
 7313 // Load Klass Pointer
 7314 instruct loadKlass(rRegP dst, memory mem)
 7315 %{
 7316   match(Set dst (LoadKlass mem));
 7317 
 7318   ins_cost(125); // XXX
 7319   format %{ "movq    $dst, $mem\t# class" %}
 7320   ins_encode %{
 7321     __ movq($dst$$Register, $mem$$Address);
 7322   %}
 7323   ins_pipe(ialu_reg_mem); // XXX
 7324 %}
 7325 
 7326 // Load narrow Klass Pointer
 7327 instruct loadNKlass(rRegN dst, memory mem)
 7328 %{
 7329   predicate(!UseCompactObjectHeaders);
 7330   match(Set dst (LoadNKlass mem));
 7331 
 7332   ins_cost(125); // XXX
 7333   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7334   ins_encode %{
 7335     __ movl($dst$$Register, $mem$$Address);
 7336   %}
 7337   ins_pipe(ialu_reg_mem); // XXX
 7338 %}
 7339 
 7340 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7341 %{
 7342   predicate(UseCompactObjectHeaders);
 7343   match(Set dst (LoadNKlass mem));
 7344   effect(KILL cr);
 7345   ins_cost(125);
 7346   format %{
 7347     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7348     "shrl    $dst, markWord::klass_shift_at_offset"
 7349   %}
 7350   ins_encode %{
 7351     if (UseAPX) {
 7352       __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
 7353     }
 7354     else {
 7355       __ movl($dst$$Register, $mem$$Address);
 7356       __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7357     }
 7358   %}
 7359   ins_pipe(ialu_reg_mem);
 7360 %}
 7361 
 7362 // Load Float
 7363 instruct loadF(regF dst, memory mem)
 7364 %{
 7365   match(Set dst (LoadF mem));
 7366 
 7367   ins_cost(145); // XXX
 7368   format %{ "movss   $dst, $mem\t# float" %}
 7369   ins_encode %{
 7370     __ movflt($dst$$XMMRegister, $mem$$Address);
 7371   %}
 7372   ins_pipe(pipe_slow); // XXX
 7373 %}
 7374 
 7375 // Load Double
 7376 instruct loadD_partial(regD dst, memory mem)
 7377 %{
 7378   predicate(!UseXmmLoadAndClearUpper);
 7379   match(Set dst (LoadD mem));
 7380 
 7381   ins_cost(145); // XXX
 7382   format %{ "movlpd  $dst, $mem\t# double" %}
 7383   ins_encode %{
 7384     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7385   %}
 7386   ins_pipe(pipe_slow); // XXX
 7387 %}
 7388 
 7389 instruct loadD(regD dst, memory mem)
 7390 %{
 7391   predicate(UseXmmLoadAndClearUpper);
 7392   match(Set dst (LoadD mem));
 7393 
 7394   ins_cost(145); // XXX
 7395   format %{ "movsd   $dst, $mem\t# double" %}
 7396   ins_encode %{
 7397     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7398   %}
 7399   ins_pipe(pipe_slow); // XXX
 7400 %}
 7401 
 7402 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
 7403 %{
 7404   match(Set dst con);
 7405 
 7406   format %{ "leaq  $dst, $con\t# AOT Runtime Constants Address" %}
 7407 
 7408   ins_encode %{
 7409     __ load_aotrc_address($dst$$Register, (address)$con$$constant);
 7410   %}
 7411 
 7412   ins_pipe(ialu_reg_fat);
 7413 %}
 7414 
 7415 // min = java.lang.Math.min(float a, float b)
 7416 // max = java.lang.Math.max(float a, float b)
 7417 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
 7418 %{
 7419   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7420   match(Set dst (MaxF a b));
 7421   match(Set dst (MinF a b));
 7422 
 7423   format %{ "minmaxF $dst, $a, $b" %}
 7424   ins_encode %{
 7425     int opcode = this->ideal_Opcode();
 7426     __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7427   %}
 7428   ins_pipe( pipe_slow );
 7429 %}
 7430 
 7431 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, rRegI rtmp, rFlagsReg cr)
 7432 %{
 7433   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7434   match(Set dst (MaxF a b));
 7435   match(Set dst (MinF a b));
 7436   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7437 
 7438   format %{ "minmaxF_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
 7439   ins_encode %{
 7440     int opcode = this->ideal_Opcode();
 7441     bool min = (opcode == Op_MinF) ? true : false;
 7442     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7443                     min, fp_prec_flt /*pt*/);
 7444   %}
 7445   ins_pipe( pipe_slow );
 7446 %}
 7447 
 7448 // min = java.lang.Math.min(float a, float b)
 7449 // max = java.lang.Math.max(float a, float b)
 7450 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
 7451 %{
 7452   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7453   match(Set dst (MaxF a b));
 7454   match(Set dst (MinF a b));
 7455   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7456 
 7457   format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7458   ins_encode %{
 7459     int opcode = this->ideal_Opcode();
 7460     int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
 7461     __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7462                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7463   %}
 7464   ins_pipe( pipe_slow );
 7465 %}
 7466 
 7467 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, rRegI rtmp, rFlagsReg cr)
 7468 %{
 7469   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7470   match(Set dst (MaxF a b));
 7471   match(Set dst (MinF a b));
 7472   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7473 
 7474   format %{ "minmaxF_reduction $dst, $a, $b \t!using $rtmp as TEMP" %}
 7475   ins_encode %{
 7476     int opcode = this->ideal_Opcode();
 7477     bool min = (opcode == Op_MinF) ? true : false;
 7478     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7479                     min, fp_prec_flt /*pt*/);
 7480   %}
 7481   ins_pipe( pipe_slow );
 7482 %}
 7483 
 7484 // min = java.lang.Math.min(double a, double b)
 7485 // max = java.lang.Math.max(double a, double b)
 7486 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
 7487 %{
 7488   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7489   match(Set dst (MaxD a b));
 7490   match(Set dst (MinD a b));
 7491 
 7492   format %{ "minmaxD $dst, $a, $b" %}
 7493   ins_encode %{
 7494     int opcode = this->ideal_Opcode();
 7495     __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7496   %}
 7497   ins_pipe( pipe_slow );
 7498 %}
 7499 
 7500 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, rRegI rtmp, rFlagsReg cr)
 7501 %{
 7502   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7503   match(Set dst (MaxD a b));
 7504   match(Set dst (MinD a b));
 7505   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7506 
 7507   format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
 7508   ins_encode %{
 7509     int opcode = this->ideal_Opcode();
 7510     bool min = (opcode == Op_MinD) ? true : false;
 7511     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7512                     min, fp_prec_dbl /*pt*/);
 7513   %}
 7514   ins_pipe( pipe_slow );
 7515 %}
 7516 
 7517 // min = java.lang.Math.min(double a, double b)
 7518 // max = java.lang.Math.max(double a, double b)
 7519 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
 7520 %{
 7521   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7522   match(Set dst (MaxD a b));
 7523   match(Set dst (MinD a b));
 7524   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7525 
 7526   format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7527   ins_encode %{
 7528     int opcode = this->ideal_Opcode();
 7529     int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
 7530     __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7531                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7532   %}
 7533   ins_pipe( pipe_slow );
 7534 %}
 7535 
 7536 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, rRegL rtmp, rFlagsReg cr)
 7537 %{
 7538   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7539   match(Set dst (MaxD a b));
 7540   match(Set dst (MinD a b));
 7541   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7542 
 7543   format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
 7544   ins_encode %{
 7545     int opcode = this->ideal_Opcode();
 7546     bool min = (opcode == Op_MinD) ? true : false;
 7547     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7548                     min, fp_prec_dbl /*pt*/);
 7549   %}
 7550   ins_pipe( pipe_slow );
 7551 %}
 7552 
 7553 // Load Effective Address
 7554 instruct leaP8(rRegP dst, indOffset8 mem)
 7555 %{
 7556   match(Set dst mem);
 7557 
 7558   ins_cost(110); // XXX
 7559   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7560   ins_encode %{
 7561     __ leaq($dst$$Register, $mem$$Address);
 7562   %}
 7563   ins_pipe(ialu_reg_reg_fat);
 7564 %}
 7565 
 7566 instruct leaP32(rRegP dst, indOffset32 mem)
 7567 %{
 7568   match(Set dst mem);
 7569 
 7570   ins_cost(110);
 7571   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7572   ins_encode %{
 7573     __ leaq($dst$$Register, $mem$$Address);
 7574   %}
 7575   ins_pipe(ialu_reg_reg_fat);
 7576 %}
 7577 
 7578 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7579 %{
 7580   match(Set dst mem);
 7581 
 7582   ins_cost(110);
 7583   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7584   ins_encode %{
 7585     __ leaq($dst$$Register, $mem$$Address);
 7586   %}
 7587   ins_pipe(ialu_reg_reg_fat);
 7588 %}
 7589 
 7590 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7591 %{
 7592   match(Set dst mem);
 7593 
 7594   ins_cost(110);
 7595   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7596   ins_encode %{
 7597     __ leaq($dst$$Register, $mem$$Address);
 7598   %}
 7599   ins_pipe(ialu_reg_reg_fat);
 7600 %}
 7601 
 7602 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7603 %{
 7604   match(Set dst mem);
 7605 
 7606   ins_cost(110);
 7607   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7608   ins_encode %{
 7609     __ leaq($dst$$Register, $mem$$Address);
 7610   %}
 7611   ins_pipe(ialu_reg_reg_fat);
 7612 %}
 7613 
 7614 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7615 %{
 7616   match(Set dst mem);
 7617 
 7618   ins_cost(110);
 7619   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7620   ins_encode %{
 7621     __ leaq($dst$$Register, $mem$$Address);
 7622   %}
 7623   ins_pipe(ialu_reg_reg_fat);
 7624 %}
 7625 
 7626 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7627 %{
 7628   match(Set dst mem);
 7629 
 7630   ins_cost(110);
 7631   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7632   ins_encode %{
 7633     __ leaq($dst$$Register, $mem$$Address);
 7634   %}
 7635   ins_pipe(ialu_reg_reg_fat);
 7636 %}
 7637 
 7638 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7639 %{
 7640   match(Set dst mem);
 7641 
 7642   ins_cost(110);
 7643   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7644   ins_encode %{
 7645     __ leaq($dst$$Register, $mem$$Address);
 7646   %}
 7647   ins_pipe(ialu_reg_reg_fat);
 7648 %}
 7649 
 7650 // Load Effective Address which uses Narrow (32-bits) oop
 7651 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7652 %{
 7653   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7654   match(Set dst mem);
 7655 
 7656   ins_cost(110);
 7657   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7658   ins_encode %{
 7659     __ leaq($dst$$Register, $mem$$Address);
 7660   %}
 7661   ins_pipe(ialu_reg_reg_fat);
 7662 %}
 7663 
 7664 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7665 %{
 7666   predicate(CompressedOops::shift() == 0);
 7667   match(Set dst mem);
 7668 
 7669   ins_cost(110); // XXX
 7670   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7671   ins_encode %{
 7672     __ leaq($dst$$Register, $mem$$Address);
 7673   %}
 7674   ins_pipe(ialu_reg_reg_fat);
 7675 %}
 7676 
 7677 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7678 %{
 7679   predicate(CompressedOops::shift() == 0);
 7680   match(Set dst mem);
 7681 
 7682   ins_cost(110);
 7683   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7684   ins_encode %{
 7685     __ leaq($dst$$Register, $mem$$Address);
 7686   %}
 7687   ins_pipe(ialu_reg_reg_fat);
 7688 %}
 7689 
 7690 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7691 %{
 7692   predicate(CompressedOops::shift() == 0);
 7693   match(Set dst mem);
 7694 
 7695   ins_cost(110);
 7696   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7697   ins_encode %{
 7698     __ leaq($dst$$Register, $mem$$Address);
 7699   %}
 7700   ins_pipe(ialu_reg_reg_fat);
 7701 %}
 7702 
 7703 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7704 %{
 7705   predicate(CompressedOops::shift() == 0);
 7706   match(Set dst mem);
 7707 
 7708   ins_cost(110);
 7709   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7710   ins_encode %{
 7711     __ leaq($dst$$Register, $mem$$Address);
 7712   %}
 7713   ins_pipe(ialu_reg_reg_fat);
 7714 %}
 7715 
 7716 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7717 %{
 7718   predicate(CompressedOops::shift() == 0);
 7719   match(Set dst mem);
 7720 
 7721   ins_cost(110);
 7722   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7723   ins_encode %{
 7724     __ leaq($dst$$Register, $mem$$Address);
 7725   %}
 7726   ins_pipe(ialu_reg_reg_fat);
 7727 %}
 7728 
 7729 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7730 %{
 7731   predicate(CompressedOops::shift() == 0);
 7732   match(Set dst mem);
 7733 
 7734   ins_cost(110);
 7735   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7736   ins_encode %{
 7737     __ leaq($dst$$Register, $mem$$Address);
 7738   %}
 7739   ins_pipe(ialu_reg_reg_fat);
 7740 %}
 7741 
 7742 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7743 %{
 7744   predicate(CompressedOops::shift() == 0);
 7745   match(Set dst mem);
 7746 
 7747   ins_cost(110);
 7748   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7749   ins_encode %{
 7750     __ leaq($dst$$Register, $mem$$Address);
 7751   %}
 7752   ins_pipe(ialu_reg_reg_fat);
 7753 %}
 7754 
 7755 instruct loadConI(rRegI dst, immI src)
 7756 %{
 7757   match(Set dst src);
 7758 
 7759   format %{ "movl    $dst, $src\t# int" %}
 7760   ins_encode %{
 7761     __ movl($dst$$Register, $src$$constant);
 7762   %}
 7763   ins_pipe(ialu_reg_fat); // XXX
 7764 %}
 7765 
 7766 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7767 %{
 7768   match(Set dst src);
 7769   effect(KILL cr);
 7770 
 7771   ins_cost(50);
 7772   format %{ "xorl    $dst, $dst\t# int" %}
 7773   ins_encode %{
 7774     __ xorl($dst$$Register, $dst$$Register);
 7775   %}
 7776   ins_pipe(ialu_reg);
 7777 %}
 7778 
 7779 instruct loadConL(rRegL dst, immL src)
 7780 %{
 7781   match(Set dst src);
 7782 
 7783   ins_cost(150);
 7784   format %{ "movq    $dst, $src\t# long" %}
 7785   ins_encode %{
 7786     __ mov64($dst$$Register, $src$$constant);
 7787   %}
 7788   ins_pipe(ialu_reg);
 7789 %}
 7790 
 7791 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7792 %{
 7793   match(Set dst src);
 7794   effect(KILL cr);
 7795 
 7796   ins_cost(50);
 7797   format %{ "xorl    $dst, $dst\t# long" %}
 7798   ins_encode %{
 7799     __ xorl($dst$$Register, $dst$$Register);
 7800   %}
 7801   ins_pipe(ialu_reg); // XXX
 7802 %}
 7803 
 7804 instruct loadConUL32(rRegL dst, immUL32 src)
 7805 %{
 7806   match(Set dst src);
 7807 
 7808   ins_cost(60);
 7809   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7810   ins_encode %{
 7811     __ movl($dst$$Register, $src$$constant);
 7812   %}
 7813   ins_pipe(ialu_reg);
 7814 %}
 7815 
 7816 instruct loadConL32(rRegL dst, immL32 src)
 7817 %{
 7818   match(Set dst src);
 7819 
 7820   ins_cost(70);
 7821   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7822   ins_encode %{
 7823     __ movq($dst$$Register, $src$$constant);
 7824   %}
 7825   ins_pipe(ialu_reg);
 7826 %}
 7827 
 7828 instruct loadConP(rRegP dst, immP con) %{
 7829   match(Set dst con);
 7830 
 7831   format %{ "movq    $dst, $con\t# ptr" %}
 7832   ins_encode %{
 7833     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7834   %}
 7835   ins_pipe(ialu_reg_fat); // XXX
 7836 %}
 7837 
 7838 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7839 %{
 7840   match(Set dst src);
 7841   effect(KILL cr);
 7842 
 7843   ins_cost(50);
 7844   format %{ "xorl    $dst, $dst\t# ptr" %}
 7845   ins_encode %{
 7846     __ xorl($dst$$Register, $dst$$Register);
 7847   %}
 7848   ins_pipe(ialu_reg);
 7849 %}
 7850 
 7851 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7852 %{
 7853   match(Set dst src);
 7854   effect(KILL cr);
 7855 
 7856   ins_cost(60);
 7857   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7858   ins_encode %{
 7859     __ movl($dst$$Register, $src$$constant);
 7860   %}
 7861   ins_pipe(ialu_reg);
 7862 %}
 7863 
 7864 instruct loadConF(regF dst, immF con) %{
 7865   match(Set dst con);
 7866   ins_cost(125);
 7867   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7868   ins_encode %{
 7869     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7870   %}
 7871   ins_pipe(pipe_slow);
 7872 %}
 7873 
 7874 instruct loadConH(regF dst, immH con) %{
 7875   match(Set dst con);
 7876   ins_cost(125);
 7877   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7878   ins_encode %{
 7879     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7880   %}
 7881   ins_pipe(pipe_slow);
 7882 %}
 7883 
 7884 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7885   match(Set dst src);
 7886   effect(KILL cr);
 7887   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7888   ins_encode %{
 7889     __ xorq($dst$$Register, $dst$$Register);
 7890   %}
 7891   ins_pipe(ialu_reg);
 7892 %}
 7893 
 7894 instruct loadConN(rRegN dst, immN src) %{
 7895   match(Set dst src);
 7896 
 7897   ins_cost(125);
 7898   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7899   ins_encode %{
 7900     address con = (address)$src$$constant;
 7901     if (con == nullptr) {
 7902       ShouldNotReachHere();
 7903     } else {
 7904       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7905     }
 7906   %}
 7907   ins_pipe(ialu_reg_fat); // XXX
 7908 %}
 7909 
 7910 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7911   match(Set dst src);
 7912 
 7913   ins_cost(125);
 7914   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7915   ins_encode %{
 7916     address con = (address)$src$$constant;
 7917     if (con == nullptr) {
 7918       ShouldNotReachHere();
 7919     } else {
 7920       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7921     }
 7922   %}
 7923   ins_pipe(ialu_reg_fat); // XXX
 7924 %}
 7925 
 7926 instruct loadConF0(regF dst, immF0 src)
 7927 %{
 7928   match(Set dst src);
 7929   ins_cost(100);
 7930 
 7931   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7932   ins_encode %{
 7933     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7934   %}
 7935   ins_pipe(pipe_slow);
 7936 %}
 7937 
 7938 // Use the same format since predicate() can not be used here.
 7939 instruct loadConD(regD dst, immD con) %{
 7940   match(Set dst con);
 7941   ins_cost(125);
 7942   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7943   ins_encode %{
 7944     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7945   %}
 7946   ins_pipe(pipe_slow);
 7947 %}
 7948 
 7949 instruct loadConD0(regD dst, immD0 src)
 7950 %{
 7951   match(Set dst src);
 7952   ins_cost(100);
 7953 
 7954   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7955   ins_encode %{
 7956     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7957   %}
 7958   ins_pipe(pipe_slow);
 7959 %}
 7960 
 7961 instruct loadSSI(rRegI dst, stackSlotI src)
 7962 %{
 7963   match(Set dst src);
 7964 
 7965   ins_cost(125);
 7966   format %{ "movl    $dst, $src\t# int stk" %}
 7967   ins_encode %{
 7968     __ movl($dst$$Register, $src$$Address);
 7969   %}
 7970   ins_pipe(ialu_reg_mem);
 7971 %}
 7972 
 7973 instruct loadSSL(rRegL dst, stackSlotL src)
 7974 %{
 7975   match(Set dst src);
 7976 
 7977   ins_cost(125);
 7978   format %{ "movq    $dst, $src\t# long stk" %}
 7979   ins_encode %{
 7980     __ movq($dst$$Register, $src$$Address);
 7981   %}
 7982   ins_pipe(ialu_reg_mem);
 7983 %}
 7984 
 7985 instruct loadSSP(rRegP dst, stackSlotP src)
 7986 %{
 7987   match(Set dst src);
 7988 
 7989   ins_cost(125);
 7990   format %{ "movq    $dst, $src\t# ptr stk" %}
 7991   ins_encode %{
 7992     __ movq($dst$$Register, $src$$Address);
 7993   %}
 7994   ins_pipe(ialu_reg_mem);
 7995 %}
 7996 
 7997 instruct loadSSF(regF dst, stackSlotF src)
 7998 %{
 7999   match(Set dst src);
 8000 
 8001   ins_cost(125);
 8002   format %{ "movss   $dst, $src\t# float stk" %}
 8003   ins_encode %{
 8004     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 8005   %}
 8006   ins_pipe(pipe_slow); // XXX
 8007 %}
 8008 
 8009 // Use the same format since predicate() can not be used here.
 8010 instruct loadSSD(regD dst, stackSlotD src)
 8011 %{
 8012   match(Set dst src);
 8013 
 8014   ins_cost(125);
 8015   format %{ "movsd   $dst, $src\t# double stk" %}
 8016   ins_encode  %{
 8017     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 8018   %}
 8019   ins_pipe(pipe_slow); // XXX
 8020 %}
 8021 
 8022 // Prefetch instructions for allocation.
 8023 // Must be safe to execute with invalid address (cannot fault).
 8024 
 8025 instruct prefetchAlloc( memory mem ) %{
 8026   predicate(AllocatePrefetchInstr==3);
 8027   match(PrefetchAllocation mem);
 8028   ins_cost(125);
 8029 
 8030   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 8031   ins_encode %{
 8032     __ prefetchw($mem$$Address);
 8033   %}
 8034   ins_pipe(ialu_mem);
 8035 %}
 8036 
 8037 instruct prefetchAllocNTA( memory mem ) %{
 8038   predicate(AllocatePrefetchInstr==0);
 8039   match(PrefetchAllocation mem);
 8040   ins_cost(125);
 8041 
 8042   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 8043   ins_encode %{
 8044     __ prefetchnta($mem$$Address);
 8045   %}
 8046   ins_pipe(ialu_mem);
 8047 %}
 8048 
 8049 instruct prefetchAllocT0( memory mem ) %{
 8050   predicate(AllocatePrefetchInstr==1);
 8051   match(PrefetchAllocation mem);
 8052   ins_cost(125);
 8053 
 8054   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 8055   ins_encode %{
 8056     __ prefetcht0($mem$$Address);
 8057   %}
 8058   ins_pipe(ialu_mem);
 8059 %}
 8060 
 8061 instruct prefetchAllocT2( memory mem ) %{
 8062   predicate(AllocatePrefetchInstr==2);
 8063   match(PrefetchAllocation mem);
 8064   ins_cost(125);
 8065 
 8066   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 8067   ins_encode %{
 8068     __ prefetcht2($mem$$Address);
 8069   %}
 8070   ins_pipe(ialu_mem);
 8071 %}
 8072 
 8073 //----------Store Instructions-------------------------------------------------
 8074 
 8075 // Store Byte
 8076 instruct storeB(memory mem, rRegI src)
 8077 %{
 8078   match(Set mem (StoreB mem src));
 8079 
 8080   ins_cost(125); // XXX
 8081   format %{ "movb    $mem, $src\t# byte" %}
 8082   ins_encode %{
 8083     __ movb($mem$$Address, $src$$Register);
 8084   %}
 8085   ins_pipe(ialu_mem_reg);
 8086 %}
 8087 
 8088 // Store Char/Short
 8089 instruct storeC(memory mem, rRegI src)
 8090 %{
 8091   match(Set mem (StoreC mem src));
 8092 
 8093   ins_cost(125); // XXX
 8094   format %{ "movw    $mem, $src\t# char/short" %}
 8095   ins_encode %{
 8096     __ movw($mem$$Address, $src$$Register);
 8097   %}
 8098   ins_pipe(ialu_mem_reg);
 8099 %}
 8100 
 8101 // Store Integer
 8102 instruct storeI(memory mem, rRegI src)
 8103 %{
 8104   match(Set mem (StoreI mem src));
 8105 
 8106   ins_cost(125); // XXX
 8107   format %{ "movl    $mem, $src\t# int" %}
 8108   ins_encode %{
 8109     __ movl($mem$$Address, $src$$Register);
 8110   %}
 8111   ins_pipe(ialu_mem_reg);
 8112 %}
 8113 
 8114 // Store Long
 8115 instruct storeL(memory mem, rRegL src)
 8116 %{
 8117   match(Set mem (StoreL mem src));
 8118 
 8119   ins_cost(125); // XXX
 8120   format %{ "movq    $mem, $src\t# long" %}
 8121   ins_encode %{
 8122     __ movq($mem$$Address, $src$$Register);
 8123   %}
 8124   ins_pipe(ialu_mem_reg); // XXX
 8125 %}
 8126 
 8127 // Store Pointer
 8128 instruct storeP(memory mem, any_RegP src)
 8129 %{
 8130   predicate(n->as_Store()->barrier_data() == 0);
 8131   match(Set mem (StoreP mem src));
 8132 
 8133   ins_cost(125); // XXX
 8134   format %{ "movq    $mem, $src\t# ptr" %}
 8135   ins_encode %{
 8136     __ movq($mem$$Address, $src$$Register);
 8137   %}
 8138   ins_pipe(ialu_mem_reg);
 8139 %}
 8140 
 8141 instruct storeImmP0(memory mem, immP0 zero)
 8142 %{
 8143   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8144   match(Set mem (StoreP mem zero));
 8145 
 8146   ins_cost(125); // XXX
 8147   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8148   ins_encode %{
 8149     __ movq($mem$$Address, r12);
 8150   %}
 8151   ins_pipe(ialu_mem_reg);
 8152 %}
 8153 
 8154 // Store Null Pointer, mark word, or other simple pointer constant.
 8155 instruct storeImmP(memory mem, immP31 src)
 8156 %{
 8157   predicate(n->as_Store()->barrier_data() == 0);
 8158   match(Set mem (StoreP mem src));
 8159 
 8160   ins_cost(150); // XXX
 8161   format %{ "movq    $mem, $src\t# ptr" %}
 8162   ins_encode %{
 8163     __ movq($mem$$Address, $src$$constant);
 8164   %}
 8165   ins_pipe(ialu_mem_imm);
 8166 %}
 8167 
 8168 // Store Compressed Pointer
 8169 instruct storeN(memory mem, rRegN src)
 8170 %{
 8171   predicate(n->as_Store()->barrier_data() == 0);
 8172   match(Set mem (StoreN mem src));
 8173 
 8174   ins_cost(125); // XXX
 8175   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8176   ins_encode %{
 8177     __ movl($mem$$Address, $src$$Register);
 8178   %}
 8179   ins_pipe(ialu_mem_reg);
 8180 %}
 8181 
 8182 instruct storeNKlass(memory mem, rRegN src)
 8183 %{
 8184   match(Set mem (StoreNKlass mem src));
 8185 
 8186   ins_cost(125); // XXX
 8187   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8188   ins_encode %{
 8189     __ movl($mem$$Address, $src$$Register);
 8190   %}
 8191   ins_pipe(ialu_mem_reg);
 8192 %}
 8193 
 8194 instruct storeImmN0(memory mem, immN0 zero)
 8195 %{
 8196   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8197   match(Set mem (StoreN mem zero));
 8198 
 8199   ins_cost(125); // XXX
 8200   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8201   ins_encode %{
 8202     __ movl($mem$$Address, r12);
 8203   %}
 8204   ins_pipe(ialu_mem_reg);
 8205 %}
 8206 
 8207 instruct storeImmN(memory mem, immN src)
 8208 %{
 8209   predicate(n->as_Store()->barrier_data() == 0);
 8210   match(Set mem (StoreN mem src));
 8211 
 8212   ins_cost(150); // XXX
 8213   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8214   ins_encode %{
 8215     address con = (address)$src$$constant;
 8216     if (con == nullptr) {
 8217       __ movl($mem$$Address, 0);
 8218     } else {
 8219       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8220     }
 8221   %}
 8222   ins_pipe(ialu_mem_imm);
 8223 %}
 8224 
 8225 instruct storeImmNKlass(memory mem, immNKlass src)
 8226 %{
 8227   match(Set mem (StoreNKlass mem src));
 8228 
 8229   ins_cost(150); // XXX
 8230   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8231   ins_encode %{
 8232     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8233   %}
 8234   ins_pipe(ialu_mem_imm);
 8235 %}
 8236 
 8237 // Store Integer Immediate
 8238 instruct storeImmI0(memory mem, immI_0 zero)
 8239 %{
 8240   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8241   match(Set mem (StoreI mem zero));
 8242 
 8243   ins_cost(125); // XXX
 8244   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8245   ins_encode %{
 8246     __ movl($mem$$Address, r12);
 8247   %}
 8248   ins_pipe(ialu_mem_reg);
 8249 %}
 8250 
 8251 instruct storeImmI(memory mem, immI src)
 8252 %{
 8253   match(Set mem (StoreI mem src));
 8254 
 8255   ins_cost(150);
 8256   format %{ "movl    $mem, $src\t# int" %}
 8257   ins_encode %{
 8258     __ movl($mem$$Address, $src$$constant);
 8259   %}
 8260   ins_pipe(ialu_mem_imm);
 8261 %}
 8262 
 8263 // Store Long Immediate
 8264 instruct storeImmL0(memory mem, immL0 zero)
 8265 %{
 8266   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8267   match(Set mem (StoreL mem zero));
 8268 
 8269   ins_cost(125); // XXX
 8270   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8271   ins_encode %{
 8272     __ movq($mem$$Address, r12);
 8273   %}
 8274   ins_pipe(ialu_mem_reg);
 8275 %}
 8276 
 8277 instruct storeImmL(memory mem, immL32 src)
 8278 %{
 8279   match(Set mem (StoreL mem src));
 8280 
 8281   ins_cost(150);
 8282   format %{ "movq    $mem, $src\t# long" %}
 8283   ins_encode %{
 8284     __ movq($mem$$Address, $src$$constant);
 8285   %}
 8286   ins_pipe(ialu_mem_imm);
 8287 %}
 8288 
 8289 // Store Short/Char Immediate
 8290 instruct storeImmC0(memory mem, immI_0 zero)
 8291 %{
 8292   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8293   match(Set mem (StoreC mem zero));
 8294 
 8295   ins_cost(125); // XXX
 8296   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8297   ins_encode %{
 8298     __ movw($mem$$Address, r12);
 8299   %}
 8300   ins_pipe(ialu_mem_reg);
 8301 %}
 8302 
 8303 instruct storeImmI16(memory mem, immI16 src)
 8304 %{
 8305   predicate(UseStoreImmI16);
 8306   match(Set mem (StoreC mem src));
 8307 
 8308   ins_cost(150);
 8309   format %{ "movw    $mem, $src\t# short/char" %}
 8310   ins_encode %{
 8311     __ movw($mem$$Address, $src$$constant);
 8312   %}
 8313   ins_pipe(ialu_mem_imm);
 8314 %}
 8315 
 8316 // Store Byte Immediate
 8317 instruct storeImmB0(memory mem, immI_0 zero)
 8318 %{
 8319   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8320   match(Set mem (StoreB mem zero));
 8321 
 8322   ins_cost(125); // XXX
 8323   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8324   ins_encode %{
 8325     __ movb($mem$$Address, r12);
 8326   %}
 8327   ins_pipe(ialu_mem_reg);
 8328 %}
 8329 
 8330 instruct storeImmB(memory mem, immI8 src)
 8331 %{
 8332   match(Set mem (StoreB mem src));
 8333 
 8334   ins_cost(150); // XXX
 8335   format %{ "movb    $mem, $src\t# byte" %}
 8336   ins_encode %{
 8337     __ movb($mem$$Address, $src$$constant);
 8338   %}
 8339   ins_pipe(ialu_mem_imm);
 8340 %}
 8341 
 8342 // Store Float
 8343 instruct storeF(memory mem, regF src)
 8344 %{
 8345   match(Set mem (StoreF mem src));
 8346 
 8347   ins_cost(95); // XXX
 8348   format %{ "movss   $mem, $src\t# float" %}
 8349   ins_encode %{
 8350     __ movflt($mem$$Address, $src$$XMMRegister);
 8351   %}
 8352   ins_pipe(pipe_slow); // XXX
 8353 %}
 8354 
 8355 // Store immediate Float value (it is faster than store from XMM register)
 8356 instruct storeF0(memory mem, immF0 zero)
 8357 %{
 8358   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8359   match(Set mem (StoreF mem zero));
 8360 
 8361   ins_cost(25); // XXX
 8362   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8363   ins_encode %{
 8364     __ movl($mem$$Address, r12);
 8365   %}
 8366   ins_pipe(ialu_mem_reg);
 8367 %}
 8368 
 8369 instruct storeF_imm(memory mem, immF src)
 8370 %{
 8371   match(Set mem (StoreF mem src));
 8372 
 8373   ins_cost(50);
 8374   format %{ "movl    $mem, $src\t# float" %}
 8375   ins_encode %{
 8376     __ movl($mem$$Address, jint_cast($src$$constant));
 8377   %}
 8378   ins_pipe(ialu_mem_imm);
 8379 %}
 8380 
 8381 // Store Double
 8382 instruct storeD(memory mem, regD src)
 8383 %{
 8384   match(Set mem (StoreD mem src));
 8385 
 8386   ins_cost(95); // XXX
 8387   format %{ "movsd   $mem, $src\t# double" %}
 8388   ins_encode %{
 8389     __ movdbl($mem$$Address, $src$$XMMRegister);
 8390   %}
 8391   ins_pipe(pipe_slow); // XXX
 8392 %}
 8393 
 8394 // Store immediate double 0.0 (it is faster than store from XMM register)
 8395 instruct storeD0_imm(memory mem, immD0 src)
 8396 %{
 8397   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8398   match(Set mem (StoreD mem src));
 8399 
 8400   ins_cost(50);
 8401   format %{ "movq    $mem, $src\t# double 0." %}
 8402   ins_encode %{
 8403     __ movq($mem$$Address, $src$$constant);
 8404   %}
 8405   ins_pipe(ialu_mem_imm);
 8406 %}
 8407 
 8408 instruct storeD0(memory mem, immD0 zero)
 8409 %{
 8410   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8411   match(Set mem (StoreD mem zero));
 8412 
 8413   ins_cost(25); // XXX
 8414   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8415   ins_encode %{
 8416     __ movq($mem$$Address, r12);
 8417   %}
 8418   ins_pipe(ialu_mem_reg);
 8419 %}
 8420 
 8421 instruct storeSSI(stackSlotI dst, rRegI src)
 8422 %{
 8423   match(Set dst src);
 8424 
 8425   ins_cost(100);
 8426   format %{ "movl    $dst, $src\t# int stk" %}
 8427   ins_encode %{
 8428     __ movl($dst$$Address, $src$$Register);
 8429   %}
 8430   ins_pipe( ialu_mem_reg );
 8431 %}
 8432 
 8433 instruct storeSSL(stackSlotL dst, rRegL src)
 8434 %{
 8435   match(Set dst src);
 8436 
 8437   ins_cost(100);
 8438   format %{ "movq    $dst, $src\t# long stk" %}
 8439   ins_encode %{
 8440     __ movq($dst$$Address, $src$$Register);
 8441   %}
 8442   ins_pipe(ialu_mem_reg);
 8443 %}
 8444 
 8445 instruct storeSSP(stackSlotP dst, rRegP src)
 8446 %{
 8447   match(Set dst src);
 8448 
 8449   ins_cost(100);
 8450   format %{ "movq    $dst, $src\t# ptr stk" %}
 8451   ins_encode %{
 8452     __ movq($dst$$Address, $src$$Register);
 8453   %}
 8454   ins_pipe(ialu_mem_reg);
 8455 %}
 8456 
 8457 instruct storeSSF(stackSlotF dst, regF src)
 8458 %{
 8459   match(Set dst src);
 8460 
 8461   ins_cost(95); // XXX
 8462   format %{ "movss   $dst, $src\t# float stk" %}
 8463   ins_encode %{
 8464     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8465   %}
 8466   ins_pipe(pipe_slow); // XXX
 8467 %}
 8468 
 8469 instruct storeSSD(stackSlotD dst, regD src)
 8470 %{
 8471   match(Set dst src);
 8472 
 8473   ins_cost(95); // XXX
 8474   format %{ "movsd   $dst, $src\t# double stk" %}
 8475   ins_encode %{
 8476     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8477   %}
 8478   ins_pipe(pipe_slow); // XXX
 8479 %}
 8480 
 8481 instruct cacheWB(indirect addr)
 8482 %{
 8483   predicate(VM_Version::supports_data_cache_line_flush());
 8484   match(CacheWB addr);
 8485 
 8486   ins_cost(100);
 8487   format %{"cache wb $addr" %}
 8488   ins_encode %{
 8489     assert($addr->index_position() < 0, "should be");
 8490     assert($addr$$disp == 0, "should be");
 8491     __ cache_wb(Address($addr$$base$$Register, 0));
 8492   %}
 8493   ins_pipe(pipe_slow); // XXX
 8494 %}
 8495 
 8496 instruct cacheWBPreSync()
 8497 %{
 8498   predicate(VM_Version::supports_data_cache_line_flush());
 8499   match(CacheWBPreSync);
 8500 
 8501   ins_cost(100);
 8502   format %{"cache wb presync" %}
 8503   ins_encode %{
 8504     __ cache_wbsync(true);
 8505   %}
 8506   ins_pipe(pipe_slow); // XXX
 8507 %}
 8508 
 8509 instruct cacheWBPostSync()
 8510 %{
 8511   predicate(VM_Version::supports_data_cache_line_flush());
 8512   match(CacheWBPostSync);
 8513 
 8514   ins_cost(100);
 8515   format %{"cache wb postsync" %}
 8516   ins_encode %{
 8517     __ cache_wbsync(false);
 8518   %}
 8519   ins_pipe(pipe_slow); // XXX
 8520 %}
 8521 
 8522 //----------BSWAP Instructions-------------------------------------------------
 8523 instruct bytes_reverse_int(rRegI dst) %{
 8524   match(Set dst (ReverseBytesI dst));
 8525 
 8526   format %{ "bswapl  $dst" %}
 8527   ins_encode %{
 8528     __ bswapl($dst$$Register);
 8529   %}
 8530   ins_pipe( ialu_reg );
 8531 %}
 8532 
 8533 instruct bytes_reverse_long(rRegL dst) %{
 8534   match(Set dst (ReverseBytesL dst));
 8535 
 8536   format %{ "bswapq  $dst" %}
 8537   ins_encode %{
 8538     __ bswapq($dst$$Register);
 8539   %}
 8540   ins_pipe( ialu_reg);
 8541 %}
 8542 
 8543 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8544   match(Set dst (ReverseBytesUS dst));
 8545   effect(KILL cr);
 8546 
 8547   format %{ "bswapl  $dst\n\t"
 8548             "shrl    $dst,16\n\t" %}
 8549   ins_encode %{
 8550     __ bswapl($dst$$Register);
 8551     __ shrl($dst$$Register, 16);
 8552   %}
 8553   ins_pipe( ialu_reg );
 8554 %}
 8555 
 8556 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8557   match(Set dst (ReverseBytesS dst));
 8558   effect(KILL cr);
 8559 
 8560   format %{ "bswapl  $dst\n\t"
 8561             "sar     $dst,16\n\t" %}
 8562   ins_encode %{
 8563     __ bswapl($dst$$Register);
 8564     __ sarl($dst$$Register, 16);
 8565   %}
 8566   ins_pipe( ialu_reg );
 8567 %}
 8568 
 8569 //---------- Zeros Count Instructions ------------------------------------------
 8570 
 8571 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8572   predicate(UseCountLeadingZerosInstruction);
 8573   match(Set dst (CountLeadingZerosI src));
 8574   effect(KILL cr);
 8575 
 8576   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8577   ins_encode %{
 8578     __ lzcntl($dst$$Register, $src$$Register);
 8579   %}
 8580   ins_pipe(ialu_reg);
 8581 %}
 8582 
 8583 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8584   predicate(UseCountLeadingZerosInstruction);
 8585   match(Set dst (CountLeadingZerosI (LoadI src)));
 8586   effect(KILL cr);
 8587   ins_cost(175);
 8588   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8589   ins_encode %{
 8590     __ lzcntl($dst$$Register, $src$$Address);
 8591   %}
 8592   ins_pipe(ialu_reg_mem);
 8593 %}
 8594 
 8595 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8596   predicate(!UseCountLeadingZerosInstruction);
 8597   match(Set dst (CountLeadingZerosI src));
 8598   effect(KILL cr);
 8599 
 8600   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8601             "jnz     skip\n\t"
 8602             "movl    $dst, -1\n"
 8603       "skip:\n\t"
 8604             "negl    $dst\n\t"
 8605             "addl    $dst, 31" %}
 8606   ins_encode %{
 8607     Register Rdst = $dst$$Register;
 8608     Register Rsrc = $src$$Register;
 8609     Label skip;
 8610     __ bsrl(Rdst, Rsrc);
 8611     __ jccb(Assembler::notZero, skip);
 8612     __ movl(Rdst, -1);
 8613     __ bind(skip);
 8614     __ negl(Rdst);
 8615     __ addl(Rdst, BitsPerInt - 1);
 8616   %}
 8617   ins_pipe(ialu_reg);
 8618 %}
 8619 
 8620 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8621   predicate(UseCountLeadingZerosInstruction);
 8622   match(Set dst (CountLeadingZerosL src));
 8623   effect(KILL cr);
 8624 
 8625   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8626   ins_encode %{
 8627     __ lzcntq($dst$$Register, $src$$Register);
 8628   %}
 8629   ins_pipe(ialu_reg);
 8630 %}
 8631 
 8632 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8633   predicate(UseCountLeadingZerosInstruction);
 8634   match(Set dst (CountLeadingZerosL (LoadL src)));
 8635   effect(KILL cr);
 8636   ins_cost(175);
 8637   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8638   ins_encode %{
 8639     __ lzcntq($dst$$Register, $src$$Address);
 8640   %}
 8641   ins_pipe(ialu_reg_mem);
 8642 %}
 8643 
 8644 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8645   predicate(!UseCountLeadingZerosInstruction);
 8646   match(Set dst (CountLeadingZerosL src));
 8647   effect(KILL cr);
 8648 
 8649   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8650             "jnz     skip\n\t"
 8651             "movl    $dst, -1\n"
 8652       "skip:\n\t"
 8653             "negl    $dst\n\t"
 8654             "addl    $dst, 63" %}
 8655   ins_encode %{
 8656     Register Rdst = $dst$$Register;
 8657     Register Rsrc = $src$$Register;
 8658     Label skip;
 8659     __ bsrq(Rdst, Rsrc);
 8660     __ jccb(Assembler::notZero, skip);
 8661     __ movl(Rdst, -1);
 8662     __ bind(skip);
 8663     __ negl(Rdst);
 8664     __ addl(Rdst, BitsPerLong - 1);
 8665   %}
 8666   ins_pipe(ialu_reg);
 8667 %}
 8668 
 8669 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8670   predicate(UseCountTrailingZerosInstruction);
 8671   match(Set dst (CountTrailingZerosI src));
 8672   effect(KILL cr);
 8673 
 8674   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8675   ins_encode %{
 8676     __ tzcntl($dst$$Register, $src$$Register);
 8677   %}
 8678   ins_pipe(ialu_reg);
 8679 %}
 8680 
 8681 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8682   predicate(UseCountTrailingZerosInstruction);
 8683   match(Set dst (CountTrailingZerosI (LoadI src)));
 8684   effect(KILL cr);
 8685   ins_cost(175);
 8686   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8687   ins_encode %{
 8688     __ tzcntl($dst$$Register, $src$$Address);
 8689   %}
 8690   ins_pipe(ialu_reg_mem);
 8691 %}
 8692 
 8693 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8694   predicate(!UseCountTrailingZerosInstruction);
 8695   match(Set dst (CountTrailingZerosI src));
 8696   effect(KILL cr);
 8697 
 8698   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8699             "jnz     done\n\t"
 8700             "movl    $dst, 32\n"
 8701       "done:" %}
 8702   ins_encode %{
 8703     Register Rdst = $dst$$Register;
 8704     Label done;
 8705     __ bsfl(Rdst, $src$$Register);
 8706     __ jccb(Assembler::notZero, done);
 8707     __ movl(Rdst, BitsPerInt);
 8708     __ bind(done);
 8709   %}
 8710   ins_pipe(ialu_reg);
 8711 %}
 8712 
 8713 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8714   predicate(UseCountTrailingZerosInstruction);
 8715   match(Set dst (CountTrailingZerosL src));
 8716   effect(KILL cr);
 8717 
 8718   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8719   ins_encode %{
 8720     __ tzcntq($dst$$Register, $src$$Register);
 8721   %}
 8722   ins_pipe(ialu_reg);
 8723 %}
 8724 
 8725 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8726   predicate(UseCountTrailingZerosInstruction);
 8727   match(Set dst (CountTrailingZerosL (LoadL src)));
 8728   effect(KILL cr);
 8729   ins_cost(175);
 8730   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8731   ins_encode %{
 8732     __ tzcntq($dst$$Register, $src$$Address);
 8733   %}
 8734   ins_pipe(ialu_reg_mem);
 8735 %}
 8736 
 8737 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8738   predicate(!UseCountTrailingZerosInstruction);
 8739   match(Set dst (CountTrailingZerosL src));
 8740   effect(KILL cr);
 8741 
 8742   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8743             "jnz     done\n\t"
 8744             "movl    $dst, 64\n"
 8745       "done:" %}
 8746   ins_encode %{
 8747     Register Rdst = $dst$$Register;
 8748     Label done;
 8749     __ bsfq(Rdst, $src$$Register);
 8750     __ jccb(Assembler::notZero, done);
 8751     __ movl(Rdst, BitsPerLong);
 8752     __ bind(done);
 8753   %}
 8754   ins_pipe(ialu_reg);
 8755 %}
 8756 
 8757 //--------------- Reverse Operation Instructions ----------------
 8758 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8759   predicate(!VM_Version::supports_gfni());
 8760   match(Set dst (ReverseI src));
 8761   effect(TEMP dst, TEMP rtmp, KILL cr);
 8762   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8763   ins_encode %{
 8764     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8765   %}
 8766   ins_pipe( ialu_reg );
 8767 %}
 8768 
 8769 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8770   predicate(VM_Version::supports_gfni());
 8771   match(Set dst (ReverseI src));
 8772   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8773   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8774   ins_encode %{
 8775     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8776   %}
 8777   ins_pipe( ialu_reg );
 8778 %}
 8779 
 8780 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8781   predicate(!VM_Version::supports_gfni());
 8782   match(Set dst (ReverseL src));
 8783   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8784   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8785   ins_encode %{
 8786     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8787   %}
 8788   ins_pipe( ialu_reg );
 8789 %}
 8790 
 8791 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8792   predicate(VM_Version::supports_gfni());
 8793   match(Set dst (ReverseL src));
 8794   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8795   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8796   ins_encode %{
 8797     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8798   %}
 8799   ins_pipe( ialu_reg );
 8800 %}
 8801 
 8802 //---------- Population Count Instructions -------------------------------------
 8803 
 8804 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8805   predicate(UsePopCountInstruction);
 8806   match(Set dst (PopCountI src));
 8807   effect(KILL cr);
 8808 
 8809   format %{ "popcnt  $dst, $src" %}
 8810   ins_encode %{
 8811     __ popcntl($dst$$Register, $src$$Register);
 8812   %}
 8813   ins_pipe(ialu_reg);
 8814 %}
 8815 
 8816 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8817   predicate(UsePopCountInstruction);
 8818   match(Set dst (PopCountI (LoadI mem)));
 8819   effect(KILL cr);
 8820 
 8821   format %{ "popcnt  $dst, $mem" %}
 8822   ins_encode %{
 8823     __ popcntl($dst$$Register, $mem$$Address);
 8824   %}
 8825   ins_pipe(ialu_reg);
 8826 %}
 8827 
 8828 // Note: Long.bitCount(long) returns an int.
 8829 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8830   predicate(UsePopCountInstruction);
 8831   match(Set dst (PopCountL src));
 8832   effect(KILL cr);
 8833 
 8834   format %{ "popcnt  $dst, $src" %}
 8835   ins_encode %{
 8836     __ popcntq($dst$$Register, $src$$Register);
 8837   %}
 8838   ins_pipe(ialu_reg);
 8839 %}
 8840 
 8841 // Note: Long.bitCount(long) returns an int.
 8842 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8843   predicate(UsePopCountInstruction);
 8844   match(Set dst (PopCountL (LoadL mem)));
 8845   effect(KILL cr);
 8846 
 8847   format %{ "popcnt  $dst, $mem" %}
 8848   ins_encode %{
 8849     __ popcntq($dst$$Register, $mem$$Address);
 8850   %}
 8851   ins_pipe(ialu_reg);
 8852 %}
 8853 
 8854 
 8855 //----------MemBar Instructions-----------------------------------------------
 8856 // Memory barrier flavors
 8857 
 8858 instruct membar_acquire()
 8859 %{
 8860   match(MemBarAcquire);
 8861   match(LoadFence);
 8862   ins_cost(0);
 8863 
 8864   size(0);
 8865   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8866   ins_encode();
 8867   ins_pipe(empty);
 8868 %}
 8869 
 8870 instruct membar_acquire_lock()
 8871 %{
 8872   match(MemBarAcquireLock);
 8873   ins_cost(0);
 8874 
 8875   size(0);
 8876   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8877   ins_encode();
 8878   ins_pipe(empty);
 8879 %}
 8880 
 8881 instruct membar_release()
 8882 %{
 8883   match(MemBarRelease);
 8884   match(StoreFence);
 8885   ins_cost(0);
 8886 
 8887   size(0);
 8888   format %{ "MEMBAR-release ! (empty encoding)" %}
 8889   ins_encode();
 8890   ins_pipe(empty);
 8891 %}
 8892 
 8893 instruct membar_release_lock()
 8894 %{
 8895   match(MemBarReleaseLock);
 8896   ins_cost(0);
 8897 
 8898   size(0);
 8899   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8900   ins_encode();
 8901   ins_pipe(empty);
 8902 %}
 8903 
 8904 instruct membar_storeload(rFlagsReg cr) %{
 8905   match(MemBarStoreLoad);
 8906   effect(KILL cr);
 8907   ins_cost(400);
 8908 
 8909   format %{
 8910     $$template
 8911     $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
 8912   %}
 8913   ins_encode %{
 8914     __ membar(Assembler::StoreLoad);
 8915   %}
 8916   ins_pipe(pipe_slow);
 8917 %}
 8918 
 8919 instruct membar_volatile(rFlagsReg cr) %{
 8920   match(MemBarVolatile);
 8921   effect(KILL cr);
 8922   ins_cost(400);
 8923 
 8924   format %{
 8925     $$template
 8926     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8927   %}
 8928   ins_encode %{
 8929     __ membar(Assembler::StoreLoad);
 8930   %}
 8931   ins_pipe(pipe_slow);
 8932 %}
 8933 
 8934 instruct unnecessary_membar_volatile()
 8935 %{
 8936   match(MemBarVolatile);
 8937   predicate(Matcher::post_store_load_barrier(n));
 8938   ins_cost(0);
 8939 
 8940   size(0);
 8941   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8942   ins_encode();
 8943   ins_pipe(empty);
 8944 %}
 8945 
 8946 instruct membar_full(rFlagsReg cr) %{
 8947   match(MemBarFull);
 8948   effect(KILL cr);
 8949   ins_cost(400);
 8950 
 8951   format %{
 8952     $$template
 8953     $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
 8954   %}
 8955   ins_encode %{
 8956     __ membar(Assembler::StoreLoad);
 8957   %}
 8958   ins_pipe(pipe_slow);
 8959 %}
 8960 
 8961 instruct membar_storestore() %{
 8962   match(MemBarStoreStore);
 8963   match(StoreStoreFence);
 8964   ins_cost(0);
 8965 
 8966   size(0);
 8967   format %{ "MEMBAR-storestore (empty encoding)" %}
 8968   ins_encode( );
 8969   ins_pipe(empty);
 8970 %}
 8971 
 8972 //----------Move Instructions--------------------------------------------------
 8973 
 8974 instruct castX2P(rRegP dst, rRegL src)
 8975 %{
 8976   match(Set dst (CastX2P src));
 8977 
 8978   format %{ "movq    $dst, $src\t# long->ptr" %}
 8979   ins_encode %{
 8980     if ($dst$$reg != $src$$reg) {
 8981       __ movptr($dst$$Register, $src$$Register);
 8982     }
 8983   %}
 8984   ins_pipe(ialu_reg_reg); // XXX
 8985 %}
 8986 
 8987 instruct castI2N(rRegN dst, rRegI src)
 8988 %{
 8989   match(Set dst (CastI2N src));
 8990 
 8991   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 8992   ins_encode %{
 8993     if ($dst$$reg != $src$$reg) {
 8994       __ movl($dst$$Register, $src$$Register);
 8995     }
 8996   %}
 8997   ins_pipe(ialu_reg_reg); // XXX
 8998 %}
 8999 
 9000 instruct castN2X(rRegL dst, rRegN src)
 9001 %{
 9002   match(Set dst (CastP2X src));
 9003 
 9004   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9005   ins_encode %{
 9006     if ($dst$$reg != $src$$reg) {
 9007       __ movptr($dst$$Register, $src$$Register);
 9008     }
 9009   %}
 9010   ins_pipe(ialu_reg_reg); // XXX
 9011 %}
 9012 
 9013 instruct castP2X(rRegL dst, rRegP src)
 9014 %{
 9015   match(Set dst (CastP2X src));
 9016 
 9017   format %{ "movq    $dst, $src\t# ptr -> long" %}
 9018   ins_encode %{
 9019     if ($dst$$reg != $src$$reg) {
 9020       __ movptr($dst$$Register, $src$$Register);
 9021     }
 9022   %}
 9023   ins_pipe(ialu_reg_reg); // XXX
 9024 %}
 9025 
 9026 // Convert oop into int for vectors alignment masking
 9027 instruct convP2I(rRegI dst, rRegP src)
 9028 %{
 9029   match(Set dst (ConvL2I (CastP2X src)));
 9030 
 9031   format %{ "movl    $dst, $src\t# ptr -> int" %}
 9032   ins_encode %{
 9033     __ movl($dst$$Register, $src$$Register);
 9034   %}
 9035   ins_pipe(ialu_reg_reg); // XXX
 9036 %}
 9037 
 9038 // Convert compressed oop into int for vectors alignment masking
 9039 // in case of 32bit oops (heap < 4Gb).
 9040 instruct convN2I(rRegI dst, rRegN src)
 9041 %{
 9042   predicate(CompressedOops::shift() == 0);
 9043   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 9044 
 9045   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 9046   ins_encode %{
 9047     __ movl($dst$$Register, $src$$Register);
 9048   %}
 9049   ins_pipe(ialu_reg_reg); // XXX
 9050 %}
 9051 
 9052 // Convert oop pointer into compressed form
 9053 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 9054   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 9055   match(Set dst (EncodeP src));
 9056   effect(KILL cr);
 9057   format %{ "encode_heap_oop $dst,$src" %}
 9058   ins_encode %{
 9059     Register s = $src$$Register;
 9060     Register d = $dst$$Register;
 9061     if (s != d) {
 9062       __ movq(d, s);
 9063     }
 9064     __ encode_heap_oop(d);
 9065   %}
 9066   ins_pipe(ialu_reg_long);
 9067 %}
 9068 
 9069 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9070   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 9071   match(Set dst (EncodeP src));
 9072   effect(KILL cr);
 9073   format %{ "encode_heap_oop_not_null $dst,$src" %}
 9074   ins_encode %{
 9075     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 9076   %}
 9077   ins_pipe(ialu_reg_long);
 9078 %}
 9079 
 9080 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 9081   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 9082             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 9083   match(Set dst (DecodeN src));
 9084   effect(KILL cr);
 9085   format %{ "decode_heap_oop $dst,$src" %}
 9086   ins_encode %{
 9087     Register s = $src$$Register;
 9088     Register d = $dst$$Register;
 9089     if (s != d) {
 9090       __ movq(d, s);
 9091     }
 9092     __ decode_heap_oop(d);
 9093   %}
 9094   ins_pipe(ialu_reg_long);
 9095 %}
 9096 
 9097 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9098   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 9099             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 9100   match(Set dst (DecodeN src));
 9101   effect(KILL cr);
 9102   format %{ "decode_heap_oop_not_null $dst,$src" %}
 9103   ins_encode %{
 9104     Register s = $src$$Register;
 9105     Register d = $dst$$Register;
 9106     if (s != d) {
 9107       __ decode_heap_oop_not_null(d, s);
 9108     } else {
 9109       __ decode_heap_oop_not_null(d);
 9110     }
 9111   %}
 9112   ins_pipe(ialu_reg_long);
 9113 %}
 9114 
 9115 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9116   match(Set dst (EncodePKlass src));
 9117   effect(TEMP dst, KILL cr);
 9118   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 9119   ins_encode %{
 9120     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9121   %}
 9122   ins_pipe(ialu_reg_long);
 9123 %}
 9124 
 9125 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9126   match(Set dst (DecodeNKlass src));
 9127   effect(TEMP dst, KILL cr);
 9128   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 9129   ins_encode %{
 9130     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9131   %}
 9132   ins_pipe(ialu_reg_long);
 9133 %}
 9134 
 9135 //----------Conditional Move---------------------------------------------------
 9136 // Jump
 9137 // dummy instruction for generating temp registers
 9138 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 9139   match(Jump (LShiftL switch_val shift));
 9140   ins_cost(350);
 9141   predicate(false);
 9142   effect(TEMP dest);
 9143 
 9144   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9145             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 9146   ins_encode %{
 9147     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9148     // to do that and the compiler is using that register as one it can allocate.
 9149     // So we build it all by hand.
 9150     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9151     // ArrayAddress dispatch(table, index);
 9152     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9153     __ lea($dest$$Register, $constantaddress);
 9154     __ jmp(dispatch);
 9155   %}
 9156   ins_pipe(pipe_jmp);
 9157 %}
 9158 
 9159 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9160   match(Jump (AddL (LShiftL switch_val shift) offset));
 9161   ins_cost(350);
 9162   effect(TEMP dest);
 9163 
 9164   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9165             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9166   ins_encode %{
 9167     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9168     // to do that and the compiler is using that register as one it can allocate.
 9169     // So we build it all by hand.
 9170     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9171     // ArrayAddress dispatch(table, index);
 9172     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9173     __ lea($dest$$Register, $constantaddress);
 9174     __ jmp(dispatch);
 9175   %}
 9176   ins_pipe(pipe_jmp);
 9177 %}
 9178 
 9179 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9180   match(Jump switch_val);
 9181   ins_cost(350);
 9182   effect(TEMP dest);
 9183 
 9184   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9185             "jmp     [$dest + $switch_val]\n\t" %}
 9186   ins_encode %{
 9187     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9188     // to do that and the compiler is using that register as one it can allocate.
 9189     // So we build it all by hand.
 9190     // Address index(noreg, switch_reg, Address::times_1);
 9191     // ArrayAddress dispatch(table, index);
 9192     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9193     __ lea($dest$$Register, $constantaddress);
 9194     __ jmp(dispatch);
 9195   %}
 9196   ins_pipe(pipe_jmp);
 9197 %}
 9198 
 9199 // Conditional move
 9200 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9201 %{
 9202   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9203   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9204 
 9205   ins_cost(100); // XXX
 9206   format %{ "setbn$cop $dst\t# signed, int" %}
 9207   ins_encode %{
 9208     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9209     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9210   %}
 9211   ins_pipe(ialu_reg);
 9212 %}
 9213 
 9214 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9215 %{
 9216   predicate(!UseAPX);
 9217   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9218 
 9219   ins_cost(200); // XXX
 9220   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9221   ins_encode %{
 9222     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9223   %}
 9224   ins_pipe(pipe_cmov_reg);
 9225 %}
 9226 
 9227 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9228 %{
 9229   predicate(UseAPX);
 9230   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9231 
 9232   ins_cost(200);
 9233   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9234   ins_encode %{
 9235     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9236   %}
 9237   ins_pipe(pipe_cmov_reg);
 9238 %}
 9239 
 9240 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9241 %{
 9242   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9243   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9244 
 9245   ins_cost(100); // XXX
 9246   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9247   ins_encode %{
 9248     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9249     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9250   %}
 9251   ins_pipe(ialu_reg);
 9252 %}
 9253 
 9254 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9255   predicate(!UseAPX);
 9256   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9257 
 9258   ins_cost(200); // XXX
 9259   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9260   ins_encode %{
 9261     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9262   %}
 9263   ins_pipe(pipe_cmov_reg);
 9264 %}
 9265 
 9266 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9267   predicate(UseAPX);
 9268   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9269 
 9270   ins_cost(200);
 9271   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9272   ins_encode %{
 9273     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9274   %}
 9275   ins_pipe(pipe_cmov_reg);
 9276 %}
 9277 
 9278 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9279 %{
 9280   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9281   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9282 
 9283   ins_cost(100); // XXX
 9284   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9285   ins_encode %{
 9286     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9287     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9288   %}
 9289   ins_pipe(ialu_reg);
 9290 %}
 9291 
 9292 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9293 %{
 9294   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9295   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9296 
 9297   ins_cost(100); // XXX
 9298   format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
 9299   ins_encode %{
 9300     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9301     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9302   %}
 9303   ins_pipe(ialu_reg);
 9304 %}
 9305 
 9306 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9307   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9308 
 9309   ins_cost(200);
 9310   expand %{
 9311     cmovI_regU(cop, cr, dst, src);
 9312   %}
 9313 %}
 9314 
 9315 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
 9316   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9317 
 9318   ins_cost(200);
 9319   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9320   ins_encode %{
 9321     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9322   %}
 9323   ins_pipe(pipe_cmov_reg);
 9324 %}
 9325 
 9326 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9327   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9328   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9329 
 9330   ins_cost(200); // XXX
 9331   format %{ "cmovpl  $dst, $src\n\t"
 9332             "cmovnel $dst, $src" %}
 9333   ins_encode %{
 9334     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9335     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9336   %}
 9337   ins_pipe(pipe_cmov_reg);
 9338 %}
 9339 
 9340 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9341 // inputs of the CMove
 9342 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9343   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9344   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9345   effect(TEMP dst);
 9346 
 9347   ins_cost(200); // XXX
 9348   format %{ "cmovpl  $dst, $src\n\t"
 9349             "cmovnel $dst, $src" %}
 9350   ins_encode %{
 9351     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9352     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9353   %}
 9354   ins_pipe(pipe_cmov_reg);
 9355 %}
 9356 
 9357 // Conditional move
 9358 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9359   predicate(!UseAPX);
 9360   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9361 
 9362   ins_cost(250); // XXX
 9363   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9364   ins_encode %{
 9365     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9366   %}
 9367   ins_pipe(pipe_cmov_mem);
 9368 %}
 9369 
 9370 // Conditional move
 9371 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9372 %{
 9373   predicate(UseAPX);
 9374   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9375 
 9376   ins_cost(250);
 9377   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9378   ins_encode %{
 9379     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9380   %}
 9381   ins_pipe(pipe_cmov_mem);
 9382 %}
 9383 
 9384 // Conditional move
 9385 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9386 %{
 9387   predicate(!UseAPX);
 9388   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9389 
 9390   ins_cost(250); // XXX
 9391   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9392   ins_encode %{
 9393     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9394   %}
 9395   ins_pipe(pipe_cmov_mem);
 9396 %}
 9397 
 9398 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9399   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9400 
 9401   ins_cost(250);
 9402   expand %{
 9403     cmovI_memU(cop, cr, dst, src);
 9404   %}
 9405 %}
 9406 
 9407 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9408 %{
 9409   predicate(UseAPX);
 9410   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9411 
 9412   ins_cost(250);
 9413   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9414   ins_encode %{
 9415     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9416   %}
 9417   ins_pipe(pipe_cmov_mem);
 9418 %}
 9419 
 9420 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
 9421 %{
 9422   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9423 
 9424   ins_cost(250);
 9425   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9426   ins_encode %{
 9427     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9428   %}
 9429   ins_pipe(pipe_cmov_mem);
 9430 %}
 9431 
 9432 // Conditional move
 9433 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9434 %{
 9435   predicate(!UseAPX);
 9436   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9437 
 9438   ins_cost(200); // XXX
 9439   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9440   ins_encode %{
 9441     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9442   %}
 9443   ins_pipe(pipe_cmov_reg);
 9444 %}
 9445 
 9446 // Conditional move ndd
 9447 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9448 %{
 9449   predicate(UseAPX);
 9450   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9451 
 9452   ins_cost(200);
 9453   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9454   ins_encode %{
 9455     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9456   %}
 9457   ins_pipe(pipe_cmov_reg);
 9458 %}
 9459 
 9460 // Conditional move
 9461 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9462 %{
 9463   predicate(!UseAPX);
 9464   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9465 
 9466   ins_cost(200); // XXX
 9467   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9468   ins_encode %{
 9469     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9470   %}
 9471   ins_pipe(pipe_cmov_reg);
 9472 %}
 9473 
 9474 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9475   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9476 
 9477   ins_cost(200);
 9478   expand %{
 9479     cmovN_regU(cop, cr, dst, src);
 9480   %}
 9481 %}
 9482 
 9483 // Conditional move ndd
 9484 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9485 %{
 9486   predicate(UseAPX);
 9487   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9488 
 9489   ins_cost(200);
 9490   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9491   ins_encode %{
 9492     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9493   %}
 9494   ins_pipe(pipe_cmov_reg);
 9495 %}
 9496 
 9497 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
 9498   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9499 
 9500   ins_cost(200);
 9501   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
 9502   ins_encode %{
 9503     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9504   %}
 9505   ins_pipe(pipe_cmov_reg);
 9506 %}
 9507 
 9508 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9509   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9510   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9511 
 9512   ins_cost(200); // XXX
 9513   format %{ "cmovpl  $dst, $src\n\t"
 9514             "cmovnel $dst, $src" %}
 9515   ins_encode %{
 9516     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9517     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9518   %}
 9519   ins_pipe(pipe_cmov_reg);
 9520 %}
 9521 
 9522 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9523 // inputs of the CMove
 9524 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9525   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9526   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9527 
 9528   ins_cost(200); // XXX
 9529   format %{ "cmovpl  $dst, $src\n\t"
 9530             "cmovnel $dst, $src" %}
 9531   ins_encode %{
 9532     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9533     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9534   %}
 9535   ins_pipe(pipe_cmov_reg);
 9536 %}
 9537 
 9538 // Conditional move
 9539 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9540 %{
 9541   predicate(!UseAPX);
 9542   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9543 
 9544   ins_cost(200); // XXX
 9545   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9546   ins_encode %{
 9547     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9548   %}
 9549   ins_pipe(pipe_cmov_reg);  // XXX
 9550 %}
 9551 
 9552 // Conditional move ndd
 9553 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9554 %{
 9555   predicate(UseAPX);
 9556   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9557 
 9558   ins_cost(200);
 9559   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9560   ins_encode %{
 9561     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9562   %}
 9563   ins_pipe(pipe_cmov_reg);
 9564 %}
 9565 
 9566 // Conditional move
 9567 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9568 %{
 9569   predicate(!UseAPX);
 9570   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9571 
 9572   ins_cost(200); // XXX
 9573   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9574   ins_encode %{
 9575     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9576   %}
 9577   ins_pipe(pipe_cmov_reg); // XXX
 9578 %}
 9579 
 9580 // Conditional move ndd
 9581 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9582 %{
 9583   predicate(UseAPX);
 9584   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9585 
 9586   ins_cost(200);
 9587   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9588   ins_encode %{
 9589     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9590   %}
 9591   ins_pipe(pipe_cmov_reg);
 9592 %}
 9593 
 9594 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9595   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9596 
 9597   ins_cost(200);
 9598   expand %{
 9599     cmovP_regU(cop, cr, dst, src);
 9600   %}
 9601 %}
 9602 
 9603 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
 9604   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9605 
 9606   ins_cost(200);
 9607   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
 9608   ins_encode %{
 9609     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9610   %}
 9611   ins_pipe(pipe_cmov_reg);
 9612 %}
 9613 
 9614 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9615   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9616   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9617 
 9618   ins_cost(200); // XXX
 9619   format %{ "cmovpq  $dst, $src\n\t"
 9620             "cmovneq $dst, $src" %}
 9621   ins_encode %{
 9622     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9623     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9624   %}
 9625   ins_pipe(pipe_cmov_reg);
 9626 %}
 9627 
 9628 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9629 // inputs of the CMove
 9630 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9631   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9632   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9633 
 9634   ins_cost(200); // XXX
 9635   format %{ "cmovpq  $dst, $src\n\t"
 9636             "cmovneq $dst, $src" %}
 9637   ins_encode %{
 9638     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9639     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9640   %}
 9641   ins_pipe(pipe_cmov_reg);
 9642 %}
 9643 
 9644 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9645 %{
 9646   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9647   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9648 
 9649   ins_cost(100); // XXX
 9650   format %{ "setbn$cop $dst\t# signed, long" %}
 9651   ins_encode %{
 9652     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9653     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9654   %}
 9655   ins_pipe(ialu_reg);
 9656 %}
 9657 
 9658 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9659 %{
 9660   predicate(!UseAPX);
 9661   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9662 
 9663   ins_cost(200); // XXX
 9664   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9665   ins_encode %{
 9666     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9667   %}
 9668   ins_pipe(pipe_cmov_reg);  // XXX
 9669 %}
 9670 
 9671 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9672 %{
 9673   predicate(UseAPX);
 9674   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9675 
 9676   ins_cost(200);
 9677   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9678   ins_encode %{
 9679     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9680   %}
 9681   ins_pipe(pipe_cmov_reg);
 9682 %}
 9683 
 9684 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9685 %{
 9686   predicate(!UseAPX);
 9687   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9688 
 9689   ins_cost(200); // XXX
 9690   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9691   ins_encode %{
 9692     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9693   %}
 9694   ins_pipe(pipe_cmov_mem);  // XXX
 9695 %}
 9696 
 9697 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9698 %{
 9699   predicate(UseAPX);
 9700   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9701 
 9702   ins_cost(200);
 9703   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9704   ins_encode %{
 9705     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9706   %}
 9707   ins_pipe(pipe_cmov_mem);
 9708 %}
 9709 
 9710 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9711 %{
 9712   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9713   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9714 
 9715   ins_cost(100); // XXX
 9716   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9717   ins_encode %{
 9718     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9719     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9720   %}
 9721   ins_pipe(ialu_reg);
 9722 %}
 9723 
 9724 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9725 %{
 9726   predicate(!UseAPX);
 9727   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9728 
 9729   ins_cost(200); // XXX
 9730   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9731   ins_encode %{
 9732     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9733   %}
 9734   ins_pipe(pipe_cmov_reg); // XXX
 9735 %}
 9736 
 9737 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9738 %{
 9739   predicate(UseAPX);
 9740   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9741 
 9742   ins_cost(200);
 9743   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9744   ins_encode %{
 9745     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9746   %}
 9747   ins_pipe(pipe_cmov_reg);
 9748 %}
 9749 
 9750 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9751 %{
 9752   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9753   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9754 
 9755   ins_cost(100); // XXX
 9756   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9757   ins_encode %{
 9758     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9759     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9760   %}
 9761   ins_pipe(ialu_reg);
 9762 %}
 9763 
 9764 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9765 %{
 9766   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9767   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9768 
 9769   ins_cost(100); // XXX
 9770   format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
 9771   ins_encode %{
 9772     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9773     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9774   %}
 9775   ins_pipe(ialu_reg);
 9776 %}
 9777 
 9778 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9779   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9780 
 9781   ins_cost(200);
 9782   expand %{
 9783     cmovL_regU(cop, cr, dst, src);
 9784   %}
 9785 %}
 9786 
 9787 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
 9788 %{
 9789   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9790 
 9791   ins_cost(200);
 9792   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9793   ins_encode %{
 9794     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9795   %}
 9796   ins_pipe(pipe_cmov_reg);
 9797 %}
 9798 
 9799 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9800   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9801   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9802 
 9803   ins_cost(200); // XXX
 9804   format %{ "cmovpq  $dst, $src\n\t"
 9805             "cmovneq $dst, $src" %}
 9806   ins_encode %{
 9807     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9808     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9809   %}
 9810   ins_pipe(pipe_cmov_reg);
 9811 %}
 9812 
 9813 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9814 // inputs of the CMove
 9815 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9816   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9817   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9818 
 9819   ins_cost(200); // XXX
 9820   format %{ "cmovpq  $dst, $src\n\t"
 9821             "cmovneq $dst, $src" %}
 9822   ins_encode %{
 9823     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9824     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9825   %}
 9826   ins_pipe(pipe_cmov_reg);
 9827 %}
 9828 
 9829 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9830 %{
 9831   predicate(!UseAPX);
 9832   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9833 
 9834   ins_cost(200); // XXX
 9835   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9836   ins_encode %{
 9837     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9838   %}
 9839   ins_pipe(pipe_cmov_mem); // XXX
 9840 %}
 9841 
 9842 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9843   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9844 
 9845   ins_cost(200);
 9846   expand %{
 9847     cmovL_memU(cop, cr, dst, src);
 9848   %}
 9849 %}
 9850 
 9851 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9852 %{
 9853   predicate(UseAPX);
 9854   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9855 
 9856   ins_cost(200);
 9857   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9858   ins_encode %{
 9859     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9860   %}
 9861   ins_pipe(pipe_cmov_mem);
 9862 %}
 9863 
 9864 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
 9865 %{
 9866   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9867 
 9868   ins_cost(200);
 9869   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9870   ins_encode %{
 9871     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9872   %}
 9873   ins_pipe(pipe_cmov_mem);
 9874 %}
 9875 
 9876 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9877 %{
 9878   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9879 
 9880   ins_cost(200); // XXX
 9881   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9882             "movss     $dst, $src\n"
 9883     "skip:" %}
 9884   ins_encode %{
 9885     Label Lskip;
 9886     // Invert sense of branch from sense of CMOV
 9887     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9888     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9889     __ bind(Lskip);
 9890   %}
 9891   ins_pipe(pipe_slow);
 9892 %}
 9893 
 9894 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9895 %{
 9896   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9897 
 9898   ins_cost(200); // XXX
 9899   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9900             "movss     $dst, $src\n"
 9901     "skip:" %}
 9902   ins_encode %{
 9903     Label Lskip;
 9904     // Invert sense of branch from sense of CMOV
 9905     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9906     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9907     __ bind(Lskip);
 9908   %}
 9909   ins_pipe(pipe_slow);
 9910 %}
 9911 
 9912 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9913   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9914 
 9915   ins_cost(200);
 9916   expand %{
 9917     cmovF_regU(cop, cr, dst, src);
 9918   %}
 9919 %}
 9920 
 9921 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
 9922 %{
 9923   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9924 
 9925   ins_cost(200); // XXX
 9926   format %{ "jn$cop    skip\t# signed, unsigned cmove float\n\t"
 9927             "movss     $dst, $src\n"
 9928     "skip:" %}
 9929   ins_encode %{
 9930     Label Lskip;
 9931     // Invert sense of branch from sense of CMOV
 9932     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9933     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9934     __ bind(Lskip);
 9935   %}
 9936   ins_pipe(pipe_slow);
 9937 %}
 9938 
 9939 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9940 %{
 9941   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9942 
 9943   ins_cost(200); // XXX
 9944   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9945             "movsd     $dst, $src\n"
 9946     "skip:" %}
 9947   ins_encode %{
 9948     Label Lskip;
 9949     // Invert sense of branch from sense of CMOV
 9950     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9951     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9952     __ bind(Lskip);
 9953   %}
 9954   ins_pipe(pipe_slow);
 9955 %}
 9956 
 9957 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9958 %{
 9959   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9960 
 9961   ins_cost(200); // XXX
 9962   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9963             "movsd     $dst, $src\n"
 9964     "skip:" %}
 9965   ins_encode %{
 9966     Label Lskip;
 9967     // Invert sense of branch from sense of CMOV
 9968     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9969     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9970     __ bind(Lskip);
 9971   %}
 9972   ins_pipe(pipe_slow);
 9973 %}
 9974 
 9975 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9976   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9977 
 9978   ins_cost(200);
 9979   expand %{
 9980     cmovD_regU(cop, cr, dst, src);
 9981   %}
 9982 %}
 9983 
 9984 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
 9985 %{
 9986   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9987 
 9988   ins_cost(200); // XXX
 9989   format %{ "jn$cop    skip\t# signed, unsigned cmove double\n\t"
 9990             "movsd     $dst, $src\n"
 9991     "skip:" %}
 9992   ins_encode %{
 9993     Label Lskip;
 9994     // Invert sense of branch from sense of CMOV
 9995     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9996     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9997     __ bind(Lskip);
 9998   %}
 9999   ins_pipe(pipe_slow);
10000 %}
10001 
10002 //----------Arithmetic Instructions--------------------------------------------
10003 //----------Addition Instructions----------------------------------------------
10004 
10005 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10006 %{
10007   predicate(!UseAPX);
10008   match(Set dst (AddI dst src));
10009   effect(KILL cr);
10010   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10011   format %{ "addl    $dst, $src\t# int" %}
10012   ins_encode %{
10013     __ addl($dst$$Register, $src$$Register);
10014   %}
10015   ins_pipe(ialu_reg_reg);
10016 %}
10017 
10018 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
10019 %{
10020   predicate(UseAPX);
10021   match(Set dst (AddI src1 src2));
10022   effect(KILL cr);
10023   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10024 
10025   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10026   ins_encode %{
10027     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
10028   %}
10029   ins_pipe(ialu_reg_reg);
10030 %}
10031 
10032 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10033 %{
10034   predicate(!UseAPX);
10035   match(Set dst (AddI dst src));
10036   effect(KILL cr);
10037   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10038 
10039   format %{ "addl    $dst, $src\t# int" %}
10040   ins_encode %{
10041     __ addl($dst$$Register, $src$$constant);
10042   %}
10043   ins_pipe( ialu_reg );
10044 %}
10045 
10046 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
10047 %{
10048   predicate(UseAPX);
10049   match(Set dst (AddI src1 src2));
10050   effect(KILL cr);
10051   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10052 
10053   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10054   ins_encode %{
10055     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
10056   %}
10057   ins_pipe( ialu_reg );
10058 %}
10059 
10060 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
10061 %{
10062   predicate(UseAPX);
10063   match(Set dst (AddI (LoadI src1) src2));
10064   effect(KILL cr);
10065   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10066 
10067   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10068   ins_encode %{
10069     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
10070   %}
10071   ins_pipe( ialu_reg );
10072 %}
10073 
10074 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10075 %{
10076   predicate(!UseAPX);
10077   match(Set dst (AddI dst (LoadI src)));
10078   effect(KILL cr);
10079   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10080 
10081   ins_cost(150); // XXX
10082   format %{ "addl    $dst, $src\t# int" %}
10083   ins_encode %{
10084     __ addl($dst$$Register, $src$$Address);
10085   %}
10086   ins_pipe(ialu_reg_mem);
10087 %}
10088 
10089 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10090 %{
10091   predicate(UseAPX);
10092   match(Set dst (AddI src1 (LoadI src2)));
10093   effect(KILL cr);
10094   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10095 
10096   ins_cost(150);
10097   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10098   ins_encode %{
10099     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10100   %}
10101   ins_pipe(ialu_reg_mem);
10102 %}
10103 
10104 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10105 %{
10106   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10107   effect(KILL cr);
10108   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10109 
10110   ins_cost(150); // XXX
10111   format %{ "addl    $dst, $src\t# int" %}
10112   ins_encode %{
10113     __ addl($dst$$Address, $src$$Register);
10114   %}
10115   ins_pipe(ialu_mem_reg);
10116 %}
10117 
10118 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10119 %{
10120   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10121   effect(KILL cr);
10122   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10123 
10124 
10125   ins_cost(125); // XXX
10126   format %{ "addl    $dst, $src\t# int" %}
10127   ins_encode %{
10128     __ addl($dst$$Address, $src$$constant);
10129   %}
10130   ins_pipe(ialu_mem_imm);
10131 %}
10132 
10133 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10134 %{
10135   predicate(!UseAPX && UseIncDec);
10136   match(Set dst (AddI dst src));
10137   effect(KILL cr);
10138 
10139   format %{ "incl    $dst\t# int" %}
10140   ins_encode %{
10141     __ incrementl($dst$$Register);
10142   %}
10143   ins_pipe(ialu_reg);
10144 %}
10145 
10146 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10147 %{
10148   predicate(UseAPX && UseIncDec);
10149   match(Set dst (AddI src val));
10150   effect(KILL cr);
10151   flag(PD::Flag_ndd_demotable_opr1);
10152 
10153   format %{ "eincl    $dst, $src\t# int ndd" %}
10154   ins_encode %{
10155     __ eincl($dst$$Register, $src$$Register, false);
10156   %}
10157   ins_pipe(ialu_reg);
10158 %}
10159 
10160 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10161 %{
10162   predicate(UseAPX && UseIncDec);
10163   match(Set dst (AddI (LoadI src) val));
10164   effect(KILL cr);
10165 
10166   format %{ "eincl    $dst, $src\t# int ndd" %}
10167   ins_encode %{
10168     __ eincl($dst$$Register, $src$$Address, false);
10169   %}
10170   ins_pipe(ialu_reg);
10171 %}
10172 
10173 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10174 %{
10175   predicate(UseIncDec);
10176   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10177   effect(KILL cr);
10178 
10179   ins_cost(125); // XXX
10180   format %{ "incl    $dst\t# int" %}
10181   ins_encode %{
10182     __ incrementl($dst$$Address);
10183   %}
10184   ins_pipe(ialu_mem_imm);
10185 %}
10186 
10187 // XXX why does that use AddI
10188 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10189 %{
10190   predicate(!UseAPX && UseIncDec);
10191   match(Set dst (AddI dst src));
10192   effect(KILL cr);
10193 
10194   format %{ "decl    $dst\t# int" %}
10195   ins_encode %{
10196     __ decrementl($dst$$Register);
10197   %}
10198   ins_pipe(ialu_reg);
10199 %}
10200 
10201 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10202 %{
10203   predicate(UseAPX && UseIncDec);
10204   match(Set dst (AddI src val));
10205   effect(KILL cr);
10206   flag(PD::Flag_ndd_demotable_opr1);
10207 
10208   format %{ "edecl    $dst, $src\t# int ndd" %}
10209   ins_encode %{
10210     __ edecl($dst$$Register, $src$$Register, false);
10211   %}
10212   ins_pipe(ialu_reg);
10213 %}
10214 
10215 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10216 %{
10217   predicate(UseAPX && UseIncDec);
10218   match(Set dst (AddI (LoadI src) val));
10219   effect(KILL cr);
10220 
10221   format %{ "edecl    $dst, $src\t# int ndd" %}
10222   ins_encode %{
10223     __ edecl($dst$$Register, $src$$Address, false);
10224   %}
10225   ins_pipe(ialu_reg);
10226 %}
10227 
10228 // XXX why does that use AddI
10229 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10230 %{
10231   predicate(UseIncDec);
10232   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10233   effect(KILL cr);
10234 
10235   ins_cost(125); // XXX
10236   format %{ "decl    $dst\t# int" %}
10237   ins_encode %{
10238     __ decrementl($dst$$Address);
10239   %}
10240   ins_pipe(ialu_mem_imm);
10241 %}
10242 
10243 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10244 %{
10245   predicate(VM_Version::supports_fast_2op_lea());
10246   match(Set dst (AddI (LShiftI index scale) disp));
10247 
10248   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10249   ins_encode %{
10250     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10251     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10252   %}
10253   ins_pipe(ialu_reg_reg);
10254 %}
10255 
10256 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10257 %{
10258   predicate(VM_Version::supports_fast_3op_lea());
10259   match(Set dst (AddI (AddI base index) disp));
10260 
10261   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10262   ins_encode %{
10263     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10264   %}
10265   ins_pipe(ialu_reg_reg);
10266 %}
10267 
10268 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10269 %{
10270   predicate(VM_Version::supports_fast_2op_lea());
10271   match(Set dst (AddI base (LShiftI index scale)));
10272 
10273   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10274   ins_encode %{
10275     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10276     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10277   %}
10278   ins_pipe(ialu_reg_reg);
10279 %}
10280 
10281 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10282 %{
10283   predicate(VM_Version::supports_fast_3op_lea());
10284   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10285 
10286   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10287   ins_encode %{
10288     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10289     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10290   %}
10291   ins_pipe(ialu_reg_reg);
10292 %}
10293 
10294 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10295 %{
10296   predicate(!UseAPX);
10297   match(Set dst (AddL dst src));
10298   effect(KILL cr);
10299   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10300 
10301   format %{ "addq    $dst, $src\t# long" %}
10302   ins_encode %{
10303     __ addq($dst$$Register, $src$$Register);
10304   %}
10305   ins_pipe(ialu_reg_reg);
10306 %}
10307 
10308 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10309 %{
10310   predicate(UseAPX);
10311   match(Set dst (AddL src1 src2));
10312   effect(KILL cr);
10313   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10314 
10315   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10316   ins_encode %{
10317     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10318   %}
10319   ins_pipe(ialu_reg_reg);
10320 %}
10321 
10322 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10323 %{
10324   predicate(!UseAPX);
10325   match(Set dst (AddL dst src));
10326   effect(KILL cr);
10327   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10328 
10329   format %{ "addq    $dst, $src\t# long" %}
10330   ins_encode %{
10331     __ addq($dst$$Register, $src$$constant);
10332   %}
10333   ins_pipe( ialu_reg );
10334 %}
10335 
10336 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10337 %{
10338   predicate(UseAPX);
10339   match(Set dst (AddL src1 src2));
10340   effect(KILL cr);
10341   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10342 
10343   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10344   ins_encode %{
10345     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10346   %}
10347   ins_pipe( ialu_reg );
10348 %}
10349 
10350 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10351 %{
10352   predicate(UseAPX);
10353   match(Set dst (AddL (LoadL src1) src2));
10354   effect(KILL cr);
10355   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10356 
10357   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10358   ins_encode %{
10359     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10360   %}
10361   ins_pipe( ialu_reg );
10362 %}
10363 
10364 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10365 %{
10366   predicate(!UseAPX);
10367   match(Set dst (AddL dst (LoadL src)));
10368   effect(KILL cr);
10369   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10370 
10371   ins_cost(150); // XXX
10372   format %{ "addq    $dst, $src\t# long" %}
10373   ins_encode %{
10374     __ addq($dst$$Register, $src$$Address);
10375   %}
10376   ins_pipe(ialu_reg_mem);
10377 %}
10378 
10379 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10380 %{
10381   predicate(UseAPX);
10382   match(Set dst (AddL src1 (LoadL src2)));
10383   effect(KILL cr);
10384   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10385 
10386   ins_cost(150);
10387   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10388   ins_encode %{
10389     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10390   %}
10391   ins_pipe(ialu_reg_mem);
10392 %}
10393 
10394 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10395 %{
10396   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10397   effect(KILL cr);
10398   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10399 
10400   ins_cost(150); // XXX
10401   format %{ "addq    $dst, $src\t# long" %}
10402   ins_encode %{
10403     __ addq($dst$$Address, $src$$Register);
10404   %}
10405   ins_pipe(ialu_mem_reg);
10406 %}
10407 
10408 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10409 %{
10410   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10411   effect(KILL cr);
10412   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10413 
10414   ins_cost(125); // XXX
10415   format %{ "addq    $dst, $src\t# long" %}
10416   ins_encode %{
10417     __ addq($dst$$Address, $src$$constant);
10418   %}
10419   ins_pipe(ialu_mem_imm);
10420 %}
10421 
10422 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10423 %{
10424   predicate(!UseAPX && UseIncDec);
10425   match(Set dst (AddL dst src));
10426   effect(KILL cr);
10427 
10428   format %{ "incq    $dst\t# long" %}
10429   ins_encode %{
10430     __ incrementq($dst$$Register);
10431   %}
10432   ins_pipe(ialu_reg);
10433 %}
10434 
10435 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10436 %{
10437   predicate(UseAPX && UseIncDec);
10438   match(Set dst (AddL src val));
10439   effect(KILL cr);
10440   flag(PD::Flag_ndd_demotable_opr1);
10441 
10442   format %{ "eincq    $dst, $src\t# long ndd" %}
10443   ins_encode %{
10444     __ eincq($dst$$Register, $src$$Register, false);
10445   %}
10446   ins_pipe(ialu_reg);
10447 %}
10448 
10449 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10450 %{
10451   predicate(UseAPX && UseIncDec);
10452   match(Set dst (AddL (LoadL src) val));
10453   effect(KILL cr);
10454 
10455   format %{ "eincq    $dst, $src\t# long ndd" %}
10456   ins_encode %{
10457     __ eincq($dst$$Register, $src$$Address, false);
10458   %}
10459   ins_pipe(ialu_reg);
10460 %}
10461 
10462 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10463 %{
10464   predicate(UseIncDec);
10465   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10466   effect(KILL cr);
10467 
10468   ins_cost(125); // XXX
10469   format %{ "incq    $dst\t# long" %}
10470   ins_encode %{
10471     __ incrementq($dst$$Address);
10472   %}
10473   ins_pipe(ialu_mem_imm);
10474 %}
10475 
10476 // XXX why does that use AddL
10477 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10478 %{
10479   predicate(!UseAPX && UseIncDec);
10480   match(Set dst (AddL dst src));
10481   effect(KILL cr);
10482 
10483   format %{ "decq    $dst\t# long" %}
10484   ins_encode %{
10485     __ decrementq($dst$$Register);
10486   %}
10487   ins_pipe(ialu_reg);
10488 %}
10489 
10490 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10491 %{
10492   predicate(UseAPX && UseIncDec);
10493   match(Set dst (AddL src val));
10494   effect(KILL cr);
10495   flag(PD::Flag_ndd_demotable_opr1);
10496 
10497   format %{ "edecq    $dst, $src\t# long ndd" %}
10498   ins_encode %{
10499     __ edecq($dst$$Register, $src$$Register, false);
10500   %}
10501   ins_pipe(ialu_reg);
10502 %}
10503 
10504 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10505 %{
10506   predicate(UseAPX && UseIncDec);
10507   match(Set dst (AddL (LoadL src) val));
10508   effect(KILL cr);
10509 
10510   format %{ "edecq    $dst, $src\t# long ndd" %}
10511   ins_encode %{
10512     __ edecq($dst$$Register, $src$$Address, false);
10513   %}
10514   ins_pipe(ialu_reg);
10515 %}
10516 
10517 // XXX why does that use AddL
10518 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10519 %{
10520   predicate(UseIncDec);
10521   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10522   effect(KILL cr);
10523 
10524   ins_cost(125); // XXX
10525   format %{ "decq    $dst\t# long" %}
10526   ins_encode %{
10527     __ decrementq($dst$$Address);
10528   %}
10529   ins_pipe(ialu_mem_imm);
10530 %}
10531 
10532 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10533 %{
10534   predicate(VM_Version::supports_fast_2op_lea());
10535   match(Set dst (AddL (LShiftL index scale) disp));
10536 
10537   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10538   ins_encode %{
10539     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10540     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10541   %}
10542   ins_pipe(ialu_reg_reg);
10543 %}
10544 
10545 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10546 %{
10547   predicate(VM_Version::supports_fast_3op_lea());
10548   match(Set dst (AddL (AddL base index) disp));
10549 
10550   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10551   ins_encode %{
10552     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10553   %}
10554   ins_pipe(ialu_reg_reg);
10555 %}
10556 
10557 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10558 %{
10559   predicate(VM_Version::supports_fast_2op_lea());
10560   match(Set dst (AddL base (LShiftL index scale)));
10561 
10562   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10563   ins_encode %{
10564     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10565     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10566   %}
10567   ins_pipe(ialu_reg_reg);
10568 %}
10569 
10570 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10571 %{
10572   predicate(VM_Version::supports_fast_3op_lea());
10573   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10574 
10575   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10576   ins_encode %{
10577     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10578     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10579   %}
10580   ins_pipe(ialu_reg_reg);
10581 %}
10582 
10583 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10584 %{
10585   match(Set dst (AddP dst src));
10586   effect(KILL cr);
10587   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10588 
10589   format %{ "addq    $dst, $src\t# ptr" %}
10590   ins_encode %{
10591     __ addq($dst$$Register, $src$$Register);
10592   %}
10593   ins_pipe(ialu_reg_reg);
10594 %}
10595 
10596 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10597 %{
10598   match(Set dst (AddP dst src));
10599   effect(KILL cr);
10600   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10601 
10602   format %{ "addq    $dst, $src\t# ptr" %}
10603   ins_encode %{
10604     __ addq($dst$$Register, $src$$constant);
10605   %}
10606   ins_pipe( ialu_reg );
10607 %}
10608 
10609 // XXX addP mem ops ????
10610 
10611 instruct checkCastPP(rRegP dst)
10612 %{
10613   match(Set dst (CheckCastPP dst));
10614 
10615   size(0);
10616   format %{ "# checkcastPP of $dst" %}
10617   ins_encode(/* empty encoding */);
10618   ins_pipe(empty);
10619 %}
10620 
10621 instruct castPP(rRegP dst)
10622 %{
10623   match(Set dst (CastPP dst));
10624 
10625   size(0);
10626   format %{ "# castPP of $dst" %}
10627   ins_encode(/* empty encoding */);
10628   ins_pipe(empty);
10629 %}
10630 
10631 instruct castII(rRegI dst)
10632 %{
10633   predicate(VerifyConstraintCasts == 0);
10634   match(Set dst (CastII dst));
10635 
10636   size(0);
10637   format %{ "# castII of $dst" %}
10638   ins_encode(/* empty encoding */);
10639   ins_cost(0);
10640   ins_pipe(empty);
10641 %}
10642 
10643 instruct castII_checked(rRegI dst, rFlagsReg cr)
10644 %{
10645   predicate(VerifyConstraintCasts > 0);
10646   match(Set dst (CastII dst));
10647 
10648   effect(KILL cr);
10649   format %{ "# cast_checked_II $dst" %}
10650   ins_encode %{
10651     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10652   %}
10653   ins_pipe(pipe_slow);
10654 %}
10655 
10656 instruct castLL(rRegL dst)
10657 %{
10658   predicate(VerifyConstraintCasts == 0);
10659   match(Set dst (CastLL dst));
10660 
10661   size(0);
10662   format %{ "# castLL of $dst" %}
10663   ins_encode(/* empty encoding */);
10664   ins_cost(0);
10665   ins_pipe(empty);
10666 %}
10667 
10668 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10669 %{
10670   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10671   match(Set dst (CastLL dst));
10672 
10673   effect(KILL cr);
10674   format %{ "# cast_checked_LL $dst" %}
10675   ins_encode %{
10676     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10677   %}
10678   ins_pipe(pipe_slow);
10679 %}
10680 
10681 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10682 %{
10683   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10684   match(Set dst (CastLL dst));
10685 
10686   effect(KILL cr, TEMP tmp);
10687   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10688   ins_encode %{
10689     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10690   %}
10691   ins_pipe(pipe_slow);
10692 %}
10693 
10694 instruct castFF(regF dst)
10695 %{
10696   match(Set dst (CastFF dst));
10697 
10698   size(0);
10699   format %{ "# castFF of $dst" %}
10700   ins_encode(/* empty encoding */);
10701   ins_cost(0);
10702   ins_pipe(empty);
10703 %}
10704 
10705 instruct castHH(regF dst)
10706 %{
10707   match(Set dst (CastHH dst));
10708 
10709   size(0);
10710   format %{ "# castHH of $dst" %}
10711   ins_encode(/* empty encoding */);
10712   ins_cost(0);
10713   ins_pipe(empty);
10714 %}
10715 
10716 instruct castDD(regD dst)
10717 %{
10718   match(Set dst (CastDD dst));
10719 
10720   size(0);
10721   format %{ "# castDD of $dst" %}
10722   ins_encode(/* empty encoding */);
10723   ins_cost(0);
10724   ins_pipe(empty);
10725 %}
10726 
10727 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10728 instruct compareAndSwapP(rRegI res,
10729                          memory mem_ptr,
10730                          rax_RegP oldval, rRegP newval,
10731                          rFlagsReg cr)
10732 %{
10733   predicate(n->as_LoadStore()->barrier_data() == 0);
10734   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10735   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10736   effect(KILL cr, KILL oldval);
10737 
10738   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10739             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10740             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10741   ins_encode %{
10742     __ lock();
10743     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10744     __ setcc(Assembler::equal, $res$$Register);
10745   %}
10746   ins_pipe( pipe_cmpxchg );
10747 %}
10748 
10749 instruct compareAndSwapL(rRegI res,
10750                          memory mem_ptr,
10751                          rax_RegL oldval, rRegL newval,
10752                          rFlagsReg cr)
10753 %{
10754   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10755   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10756   effect(KILL cr, KILL oldval);
10757 
10758   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10759             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10760             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10761   ins_encode %{
10762     __ lock();
10763     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10764     __ setcc(Assembler::equal, $res$$Register);
10765   %}
10766   ins_pipe( pipe_cmpxchg );
10767 %}
10768 
10769 instruct compareAndSwapI(rRegI res,
10770                          memory mem_ptr,
10771                          rax_RegI oldval, rRegI newval,
10772                          rFlagsReg cr)
10773 %{
10774   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10775   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10776   effect(KILL cr, KILL oldval);
10777 
10778   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10779             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10780             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10781   ins_encode %{
10782     __ lock();
10783     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10784     __ setcc(Assembler::equal, $res$$Register);
10785   %}
10786   ins_pipe( pipe_cmpxchg );
10787 %}
10788 
10789 instruct compareAndSwapB(rRegI res,
10790                          memory mem_ptr,
10791                          rax_RegI oldval, rRegI newval,
10792                          rFlagsReg cr)
10793 %{
10794   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10795   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10796   effect(KILL cr, KILL oldval);
10797 
10798   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10799             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10800             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10801   ins_encode %{
10802     __ lock();
10803     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10804     __ setcc(Assembler::equal, $res$$Register);
10805   %}
10806   ins_pipe( pipe_cmpxchg );
10807 %}
10808 
10809 instruct compareAndSwapS(rRegI res,
10810                          memory mem_ptr,
10811                          rax_RegI oldval, rRegI newval,
10812                          rFlagsReg cr)
10813 %{
10814   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10815   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10816   effect(KILL cr, KILL oldval);
10817 
10818   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10819             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10820             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10821   ins_encode %{
10822     __ lock();
10823     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10824     __ setcc(Assembler::equal, $res$$Register);
10825   %}
10826   ins_pipe( pipe_cmpxchg );
10827 %}
10828 
10829 instruct compareAndSwapN(rRegI res,
10830                           memory mem_ptr,
10831                           rax_RegN oldval, rRegN newval,
10832                           rFlagsReg cr) %{
10833   predicate(n->as_LoadStore()->barrier_data() == 0);
10834   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10835   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10836   effect(KILL cr, KILL oldval);
10837 
10838   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10839             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10840             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10841   ins_encode %{
10842     __ lock();
10843     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10844     __ setcc(Assembler::equal, $res$$Register);
10845   %}
10846   ins_pipe( pipe_cmpxchg );
10847 %}
10848 
10849 instruct compareAndExchangeB(
10850                          memory mem_ptr,
10851                          rax_RegI oldval, rRegI newval,
10852                          rFlagsReg cr)
10853 %{
10854   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10855   effect(KILL cr);
10856 
10857   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10858             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10859   ins_encode %{
10860     __ lock();
10861     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10862   %}
10863   ins_pipe( pipe_cmpxchg );
10864 %}
10865 
10866 instruct compareAndExchangeS(
10867                          memory mem_ptr,
10868                          rax_RegI oldval, rRegI newval,
10869                          rFlagsReg cr)
10870 %{
10871   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10872   effect(KILL cr);
10873 
10874   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10875             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10876   ins_encode %{
10877     __ lock();
10878     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10879   %}
10880   ins_pipe( pipe_cmpxchg );
10881 %}
10882 
10883 instruct compareAndExchangeI(
10884                          memory mem_ptr,
10885                          rax_RegI oldval, rRegI newval,
10886                          rFlagsReg cr)
10887 %{
10888   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10889   effect(KILL cr);
10890 
10891   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10892             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10893   ins_encode %{
10894     __ lock();
10895     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10896   %}
10897   ins_pipe( pipe_cmpxchg );
10898 %}
10899 
10900 instruct compareAndExchangeL(
10901                          memory mem_ptr,
10902                          rax_RegL oldval, rRegL newval,
10903                          rFlagsReg cr)
10904 %{
10905   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10906   effect(KILL cr);
10907 
10908   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10909             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10910   ins_encode %{
10911     __ lock();
10912     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10913   %}
10914   ins_pipe( pipe_cmpxchg );
10915 %}
10916 
10917 instruct compareAndExchangeN(
10918                           memory mem_ptr,
10919                           rax_RegN oldval, rRegN newval,
10920                           rFlagsReg cr) %{
10921   predicate(n->as_LoadStore()->barrier_data() == 0);
10922   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10923   effect(KILL cr);
10924 
10925   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10926             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10927   ins_encode %{
10928     __ lock();
10929     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10930   %}
10931   ins_pipe( pipe_cmpxchg );
10932 %}
10933 
10934 instruct compareAndExchangeP(
10935                          memory mem_ptr,
10936                          rax_RegP oldval, rRegP newval,
10937                          rFlagsReg cr)
10938 %{
10939   predicate(n->as_LoadStore()->barrier_data() == 0);
10940   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10941   effect(KILL cr);
10942 
10943   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10944             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10945   ins_encode %{
10946     __ lock();
10947     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10948   %}
10949   ins_pipe( pipe_cmpxchg );
10950 %}
10951 
10952 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10953   predicate(n->as_LoadStore()->result_not_used());
10954   match(Set dummy (GetAndAddB mem add));
10955   effect(KILL cr);
10956   format %{ "addb_lock   $mem, $add" %}
10957   ins_encode %{
10958     __ lock();
10959     __ addb($mem$$Address, $add$$Register);
10960   %}
10961   ins_pipe(pipe_cmpxchg);
10962 %}
10963 
10964 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10965   predicate(n->as_LoadStore()->result_not_used());
10966   match(Set dummy (GetAndAddB mem add));
10967   effect(KILL cr);
10968   format %{ "addb_lock   $mem, $add" %}
10969   ins_encode %{
10970     __ lock();
10971     __ addb($mem$$Address, $add$$constant);
10972   %}
10973   ins_pipe(pipe_cmpxchg);
10974 %}
10975 
10976 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10977   predicate(!n->as_LoadStore()->result_not_used());
10978   match(Set newval (GetAndAddB mem newval));
10979   effect(KILL cr);
10980   format %{ "xaddb_lock  $mem, $newval\t# $newval -> byte" %}
10981   ins_encode %{
10982     __ lock();
10983     __ xaddb($mem$$Address, $newval$$Register);
10984     __ narrow_subword_type($newval$$Register, T_BYTE);
10985   %}
10986   ins_pipe(pipe_cmpxchg);
10987 %}
10988 
10989 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10990   predicate(n->as_LoadStore()->result_not_used());
10991   match(Set dummy (GetAndAddS mem add));
10992   effect(KILL cr);
10993   format %{ "addw_lock   $mem, $add" %}
10994   ins_encode %{
10995     __ lock();
10996     __ addw($mem$$Address, $add$$Register);
10997   %}
10998   ins_pipe(pipe_cmpxchg);
10999 %}
11000 
11001 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
11002   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
11003   match(Set dummy (GetAndAddS mem add));
11004   effect(KILL cr);
11005   format %{ "addw_lock   $mem, $add" %}
11006   ins_encode %{
11007     __ lock();
11008     __ addw($mem$$Address, $add$$constant);
11009   %}
11010   ins_pipe(pipe_cmpxchg);
11011 %}
11012 
11013 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
11014   predicate(!n->as_LoadStore()->result_not_used());
11015   match(Set newval (GetAndAddS mem newval));
11016   effect(KILL cr);
11017   format %{ "xaddw_lock  $mem, $newval\t# $newval -> short" %}
11018   ins_encode %{
11019     __ lock();
11020     __ xaddw($mem$$Address, $newval$$Register);
11021     __ narrow_subword_type($newval$$Register, T_SHORT);
11022   %}
11023   ins_pipe(pipe_cmpxchg);
11024 %}
11025 
11026 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
11027   predicate(n->as_LoadStore()->result_not_used());
11028   match(Set dummy (GetAndAddI mem add));
11029   effect(KILL cr);
11030   format %{ "addl_lock   $mem, $add" %}
11031   ins_encode %{
11032     __ lock();
11033     __ addl($mem$$Address, $add$$Register);
11034   %}
11035   ins_pipe(pipe_cmpxchg);
11036 %}
11037 
11038 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
11039   predicate(n->as_LoadStore()->result_not_used());
11040   match(Set dummy (GetAndAddI mem add));
11041   effect(KILL cr);
11042   format %{ "addl_lock   $mem, $add" %}
11043   ins_encode %{
11044     __ lock();
11045     __ addl($mem$$Address, $add$$constant);
11046   %}
11047   ins_pipe(pipe_cmpxchg);
11048 %}
11049 
11050 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
11051   predicate(!n->as_LoadStore()->result_not_used());
11052   match(Set newval (GetAndAddI mem newval));
11053   effect(KILL cr);
11054   format %{ "xaddl_lock  $mem, $newval" %}
11055   ins_encode %{
11056     __ lock();
11057     __ xaddl($mem$$Address, $newval$$Register);
11058   %}
11059   ins_pipe(pipe_cmpxchg);
11060 %}
11061 
11062 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
11063   predicate(n->as_LoadStore()->result_not_used());
11064   match(Set dummy (GetAndAddL mem add));
11065   effect(KILL cr);
11066   format %{ "addq_lock   $mem, $add" %}
11067   ins_encode %{
11068     __ lock();
11069     __ addq($mem$$Address, $add$$Register);
11070   %}
11071   ins_pipe(pipe_cmpxchg);
11072 %}
11073 
11074 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
11075   predicate(n->as_LoadStore()->result_not_used());
11076   match(Set dummy (GetAndAddL mem add));
11077   effect(KILL cr);
11078   format %{ "addq_lock   $mem, $add" %}
11079   ins_encode %{
11080     __ lock();
11081     __ addq($mem$$Address, $add$$constant);
11082   %}
11083   ins_pipe(pipe_cmpxchg);
11084 %}
11085 
11086 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11087   predicate(!n->as_LoadStore()->result_not_used());
11088   match(Set newval (GetAndAddL mem newval));
11089   effect(KILL cr);
11090   format %{ "xaddq_lock  $mem, $newval" %}
11091   ins_encode %{
11092     __ lock();
11093     __ xaddq($mem$$Address, $newval$$Register);
11094   %}
11095   ins_pipe(pipe_cmpxchg);
11096 %}
11097 
11098 instruct xchgB( memory mem, rRegI newval) %{
11099   match(Set newval (GetAndSetB mem newval));
11100   format %{ "XCHGB  $newval,[$mem]\t# $newval -> byte" %}
11101   ins_encode %{
11102     __ xchgb($newval$$Register, $mem$$Address);
11103     __ narrow_subword_type($newval$$Register, T_BYTE);
11104   %}
11105   ins_pipe( pipe_cmpxchg );
11106 %}
11107 
11108 instruct xchgS( memory mem, rRegI newval) %{
11109   match(Set newval (GetAndSetS mem newval));
11110   format %{ "XCHGW  $newval,[$mem]\t# $newval -> short" %}
11111   ins_encode %{
11112     __ xchgw($newval$$Register, $mem$$Address);
11113     __ narrow_subword_type($newval$$Register, T_SHORT);
11114   %}
11115   ins_pipe( pipe_cmpxchg );
11116 %}
11117 
11118 instruct xchgI( memory mem, rRegI newval) %{
11119   match(Set newval (GetAndSetI mem newval));
11120   format %{ "XCHGL  $newval,[$mem]" %}
11121   ins_encode %{
11122     __ xchgl($newval$$Register, $mem$$Address);
11123   %}
11124   ins_pipe( pipe_cmpxchg );
11125 %}
11126 
11127 instruct xchgL( memory mem, rRegL newval) %{
11128   match(Set newval (GetAndSetL mem newval));
11129   format %{ "XCHGL  $newval,[$mem]" %}
11130   ins_encode %{
11131     __ xchgq($newval$$Register, $mem$$Address);
11132   %}
11133   ins_pipe( pipe_cmpxchg );
11134 %}
11135 
11136 instruct xchgP( memory mem, rRegP newval) %{
11137   match(Set newval (GetAndSetP mem newval));
11138   predicate(n->as_LoadStore()->barrier_data() == 0);
11139   format %{ "XCHGQ  $newval,[$mem]" %}
11140   ins_encode %{
11141     __ xchgq($newval$$Register, $mem$$Address);
11142   %}
11143   ins_pipe( pipe_cmpxchg );
11144 %}
11145 
11146 instruct xchgN( memory mem, rRegN newval) %{
11147   predicate(n->as_LoadStore()->barrier_data() == 0);
11148   match(Set newval (GetAndSetN mem newval));
11149   format %{ "XCHGL  $newval,$mem]" %}
11150   ins_encode %{
11151     __ xchgl($newval$$Register, $mem$$Address);
11152   %}
11153   ins_pipe( pipe_cmpxchg );
11154 %}
11155 
11156 //----------Abs Instructions-------------------------------------------
11157 
11158 // Integer Absolute Instructions
11159 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11160 %{
11161   match(Set dst (AbsI src));
11162   effect(TEMP dst, KILL cr);
11163   format %{ "xorl    $dst, $dst\t# abs int\n\t"
11164             "subl    $dst, $src\n\t"
11165             "cmovll  $dst, $src" %}
11166   ins_encode %{
11167     __ xorl($dst$$Register, $dst$$Register);
11168     __ subl($dst$$Register, $src$$Register);
11169     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11170   %}
11171 
11172   ins_pipe(ialu_reg_reg);
11173 %}
11174 
11175 // Long Absolute Instructions
11176 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11177 %{
11178   match(Set dst (AbsL src));
11179   effect(TEMP dst, KILL cr);
11180   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11181             "subq    $dst, $src\n\t"
11182             "cmovlq  $dst, $src" %}
11183   ins_encode %{
11184     __ xorl($dst$$Register, $dst$$Register);
11185     __ subq($dst$$Register, $src$$Register);
11186     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11187   %}
11188 
11189   ins_pipe(ialu_reg_reg);
11190 %}
11191 
11192 //----------Subtraction Instructions-------------------------------------------
11193 
11194 // Integer Subtraction Instructions
11195 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11196 %{
11197   predicate(!UseAPX);
11198   match(Set dst (SubI dst src));
11199   effect(KILL cr);
11200   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11201 
11202   format %{ "subl    $dst, $src\t# int" %}
11203   ins_encode %{
11204     __ subl($dst$$Register, $src$$Register);
11205   %}
11206   ins_pipe(ialu_reg_reg);
11207 %}
11208 
11209 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11210 %{
11211   predicate(UseAPX);
11212   match(Set dst (SubI src1 src2));
11213   effect(KILL cr);
11214   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11215 
11216   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11217   ins_encode %{
11218     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11219   %}
11220   ins_pipe(ialu_reg_reg);
11221 %}
11222 
11223 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11224 %{
11225   predicate(UseAPX);
11226   match(Set dst (SubI src1 src2));
11227   effect(KILL cr);
11228   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11229 
11230   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11231   ins_encode %{
11232     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11233   %}
11234   ins_pipe(ialu_reg_reg);
11235 %}
11236 
11237 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11238 %{
11239   predicate(UseAPX);
11240   match(Set dst (SubI (LoadI src1) src2));
11241   effect(KILL cr);
11242   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11243 
11244   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11245   ins_encode %{
11246     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11247   %}
11248   ins_pipe(ialu_reg_reg);
11249 %}
11250 
11251 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11252 %{
11253   predicate(!UseAPX);
11254   match(Set dst (SubI dst (LoadI src)));
11255   effect(KILL cr);
11256   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11257 
11258   ins_cost(150);
11259   format %{ "subl    $dst, $src\t# int" %}
11260   ins_encode %{
11261     __ subl($dst$$Register, $src$$Address);
11262   %}
11263   ins_pipe(ialu_reg_mem);
11264 %}
11265 
11266 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11267 %{
11268   predicate(UseAPX);
11269   match(Set dst (SubI src1 (LoadI src2)));
11270   effect(KILL cr);
11271   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11272 
11273   ins_cost(150);
11274   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11275   ins_encode %{
11276     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11277   %}
11278   ins_pipe(ialu_reg_mem);
11279 %}
11280 
11281 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11282 %{
11283   predicate(UseAPX);
11284   match(Set dst (SubI (LoadI src1) src2));
11285   effect(KILL cr);
11286   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11287 
11288   ins_cost(150);
11289   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11290   ins_encode %{
11291     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11292   %}
11293   ins_pipe(ialu_reg_mem);
11294 %}
11295 
11296 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11297 %{
11298   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11299   effect(KILL cr);
11300   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11301 
11302   ins_cost(150);
11303   format %{ "subl    $dst, $src\t# int" %}
11304   ins_encode %{
11305     __ subl($dst$$Address, $src$$Register);
11306   %}
11307   ins_pipe(ialu_mem_reg);
11308 %}
11309 
11310 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11311 %{
11312   predicate(!UseAPX);
11313   match(Set dst (SubL dst src));
11314   effect(KILL cr);
11315   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11316 
11317   format %{ "subq    $dst, $src\t# long" %}
11318   ins_encode %{
11319     __ subq($dst$$Register, $src$$Register);
11320   %}
11321   ins_pipe(ialu_reg_reg);
11322 %}
11323 
11324 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11325 %{
11326   predicate(UseAPX);
11327   match(Set dst (SubL src1 src2));
11328   effect(KILL cr);
11329   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11330 
11331   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11332   ins_encode %{
11333     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11334   %}
11335   ins_pipe(ialu_reg_reg);
11336 %}
11337 
11338 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11339 %{
11340   predicate(UseAPX);
11341   match(Set dst (SubL src1 src2));
11342   effect(KILL cr);
11343   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11344 
11345   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11346   ins_encode %{
11347     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11348   %}
11349   ins_pipe(ialu_reg_reg);
11350 %}
11351 
11352 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11353 %{
11354   predicate(UseAPX);
11355   match(Set dst (SubL (LoadL src1) src2));
11356   effect(KILL cr);
11357   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11358 
11359   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11360   ins_encode %{
11361     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11362   %}
11363   ins_pipe(ialu_reg_reg);
11364 %}
11365 
11366 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11367 %{
11368   predicate(!UseAPX);
11369   match(Set dst (SubL dst (LoadL src)));
11370   effect(KILL cr);
11371   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11372 
11373   ins_cost(150);
11374   format %{ "subq    $dst, $src\t# long" %}
11375   ins_encode %{
11376     __ subq($dst$$Register, $src$$Address);
11377   %}
11378   ins_pipe(ialu_reg_mem);
11379 %}
11380 
11381 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11382 %{
11383   predicate(UseAPX);
11384   match(Set dst (SubL src1 (LoadL src2)));
11385   effect(KILL cr);
11386   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11387 
11388   ins_cost(150);
11389   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11390   ins_encode %{
11391     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11392   %}
11393   ins_pipe(ialu_reg_mem);
11394 %}
11395 
11396 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11397 %{
11398   predicate(UseAPX);
11399   match(Set dst (SubL (LoadL src1) src2));
11400   effect(KILL cr);
11401   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11402 
11403   ins_cost(150);
11404   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11405   ins_encode %{
11406     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11407   %}
11408   ins_pipe(ialu_reg_mem);
11409 %}
11410 
11411 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11412 %{
11413   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11414   effect(KILL cr);
11415   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11416 
11417   ins_cost(150);
11418   format %{ "subq    $dst, $src\t# long" %}
11419   ins_encode %{
11420     __ subq($dst$$Address, $src$$Register);
11421   %}
11422   ins_pipe(ialu_mem_reg);
11423 %}
11424 
11425 // Subtract from a pointer
11426 // XXX hmpf???
11427 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11428 %{
11429   match(Set dst (AddP dst (SubI zero src)));
11430   effect(KILL cr);
11431 
11432   format %{ "subq    $dst, $src\t# ptr - int" %}
11433   ins_encode %{
11434     __ subq($dst$$Register, $src$$Register);
11435   %}
11436   ins_pipe(ialu_reg_reg);
11437 %}
11438 
11439 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11440 %{
11441   predicate(!UseAPX);
11442   match(Set dst (SubI zero dst));
11443   effect(KILL cr);
11444   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11445 
11446   format %{ "negl    $dst\t# int" %}
11447   ins_encode %{
11448     __ negl($dst$$Register);
11449   %}
11450   ins_pipe(ialu_reg);
11451 %}
11452 
11453 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11454 %{
11455   predicate(UseAPX);
11456   match(Set dst (SubI zero src));
11457   effect(KILL cr);
11458   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11459 
11460   format %{ "enegl    $dst, $src\t# int ndd" %}
11461   ins_encode %{
11462     __ enegl($dst$$Register, $src$$Register, false);
11463   %}
11464   ins_pipe(ialu_reg);
11465 %}
11466 
11467 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11468 %{
11469   predicate(!UseAPX);
11470   match(Set dst (NegI dst));
11471   effect(KILL cr);
11472   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11473 
11474   format %{ "negl    $dst\t# int" %}
11475   ins_encode %{
11476     __ negl($dst$$Register);
11477   %}
11478   ins_pipe(ialu_reg);
11479 %}
11480 
11481 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11482 %{
11483   predicate(UseAPX);
11484   match(Set dst (NegI src));
11485   effect(KILL cr);
11486   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11487 
11488   format %{ "enegl    $dst, $src\t# int ndd" %}
11489   ins_encode %{
11490     __ enegl($dst$$Register, $src$$Register, false);
11491   %}
11492   ins_pipe(ialu_reg);
11493 %}
11494 
11495 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11496 %{
11497   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11498   effect(KILL cr);
11499   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11500 
11501   format %{ "negl    $dst\t# int" %}
11502   ins_encode %{
11503     __ negl($dst$$Address);
11504   %}
11505   ins_pipe(ialu_reg);
11506 %}
11507 
11508 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11509 %{
11510   predicate(!UseAPX);
11511   match(Set dst (SubL zero dst));
11512   effect(KILL cr);
11513   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11514 
11515   format %{ "negq    $dst\t# long" %}
11516   ins_encode %{
11517     __ negq($dst$$Register);
11518   %}
11519   ins_pipe(ialu_reg);
11520 %}
11521 
11522 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11523 %{
11524   predicate(UseAPX);
11525   match(Set dst (SubL zero src));
11526   effect(KILL cr);
11527   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11528 
11529   format %{ "enegq    $dst, $src\t# long ndd" %}
11530   ins_encode %{
11531     __ enegq($dst$$Register, $src$$Register, false);
11532   %}
11533   ins_pipe(ialu_reg);
11534 %}
11535 
11536 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11537 %{
11538   predicate(!UseAPX);
11539   match(Set dst (NegL dst));
11540   effect(KILL cr);
11541   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11542 
11543   format %{ "negq    $dst\t# int" %}
11544   ins_encode %{
11545     __ negq($dst$$Register);
11546   %}
11547   ins_pipe(ialu_reg);
11548 %}
11549 
11550 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11551 %{
11552   predicate(UseAPX);
11553   match(Set dst (NegL src));
11554   effect(KILL cr);
11555   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11556 
11557   format %{ "enegq    $dst, $src\t# long ndd" %}
11558   ins_encode %{
11559     __ enegq($dst$$Register, $src$$Register, false);
11560   %}
11561   ins_pipe(ialu_reg);
11562 %}
11563 
11564 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11565 %{
11566   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11567   effect(KILL cr);
11568   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11569 
11570   format %{ "negq    $dst\t# long" %}
11571   ins_encode %{
11572     __ negq($dst$$Address);
11573   %}
11574   ins_pipe(ialu_reg);
11575 %}
11576 
11577 //----------Multiplication/Division Instructions-------------------------------
11578 // Integer Multiplication Instructions
11579 // Multiply Register
11580 
11581 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11582 %{
11583   predicate(!UseAPX);
11584   match(Set dst (MulI dst src));
11585   effect(KILL cr);
11586 
11587   ins_cost(300);
11588   format %{ "imull   $dst, $src\t# int" %}
11589   ins_encode %{
11590     __ imull($dst$$Register, $src$$Register);
11591   %}
11592   ins_pipe(ialu_reg_reg_alu0);
11593 %}
11594 
11595 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11596 %{
11597   predicate(UseAPX);
11598   match(Set dst (MulI src1 src2));
11599   effect(KILL cr);
11600   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11601 
11602   ins_cost(300);
11603   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11604   ins_encode %{
11605     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11606   %}
11607   ins_pipe(ialu_reg_reg_alu0);
11608 %}
11609 
11610 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11611 %{
11612   match(Set dst (MulI src imm));
11613   effect(KILL cr);
11614 
11615   ins_cost(300);
11616   format %{ "imull   $dst, $src, $imm\t# int" %}
11617   ins_encode %{
11618     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11619   %}
11620   ins_pipe(ialu_reg_reg_alu0);
11621 %}
11622 
11623 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11624 %{
11625   predicate(!UseAPX);
11626   match(Set dst (MulI dst (LoadI src)));
11627   effect(KILL cr);
11628 
11629   ins_cost(350);
11630   format %{ "imull   $dst, $src\t# int" %}
11631   ins_encode %{
11632     __ imull($dst$$Register, $src$$Address);
11633   %}
11634   ins_pipe(ialu_reg_mem_alu0);
11635 %}
11636 
11637 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11638 %{
11639   predicate(UseAPX);
11640   match(Set dst (MulI src1 (LoadI src2)));
11641   effect(KILL cr);
11642   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11643 
11644   ins_cost(350);
11645   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11646   ins_encode %{
11647     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11648   %}
11649   ins_pipe(ialu_reg_mem_alu0);
11650 %}
11651 
11652 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11653 %{
11654   match(Set dst (MulI (LoadI src) imm));
11655   effect(KILL cr);
11656 
11657   ins_cost(300);
11658   format %{ "imull   $dst, $src, $imm\t# int" %}
11659   ins_encode %{
11660     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11661   %}
11662   ins_pipe(ialu_reg_mem_alu0);
11663 %}
11664 
11665 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11666 %{
11667   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11668   effect(KILL cr, KILL src2);
11669 
11670   expand %{ mulI_rReg(dst, src1, cr);
11671            mulI_rReg(src2, src3, cr);
11672            addI_rReg(dst, src2, cr); %}
11673 %}
11674 
11675 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11676 %{
11677   predicate(!UseAPX);
11678   match(Set dst (MulL dst src));
11679   effect(KILL cr);
11680 
11681   ins_cost(300);
11682   format %{ "imulq   $dst, $src\t# long" %}
11683   ins_encode %{
11684     __ imulq($dst$$Register, $src$$Register);
11685   %}
11686   ins_pipe(ialu_reg_reg_alu0);
11687 %}
11688 
11689 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11690 %{
11691   predicate(UseAPX);
11692   match(Set dst (MulL src1 src2));
11693   effect(KILL cr);
11694   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11695 
11696   ins_cost(300);
11697   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11698   ins_encode %{
11699     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11700   %}
11701   ins_pipe(ialu_reg_reg_alu0);
11702 %}
11703 
11704 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11705 %{
11706   match(Set dst (MulL src imm));
11707   effect(KILL cr);
11708 
11709   ins_cost(300);
11710   format %{ "imulq   $dst, $src, $imm\t# long" %}
11711   ins_encode %{
11712     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11713   %}
11714   ins_pipe(ialu_reg_reg_alu0);
11715 %}
11716 
11717 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11718 %{
11719   predicate(!UseAPX);
11720   match(Set dst (MulL dst (LoadL src)));
11721   effect(KILL cr);
11722 
11723   ins_cost(350);
11724   format %{ "imulq   $dst, $src\t# long" %}
11725   ins_encode %{
11726     __ imulq($dst$$Register, $src$$Address);
11727   %}
11728   ins_pipe(ialu_reg_mem_alu0);
11729 %}
11730 
11731 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11732 %{
11733   predicate(UseAPX);
11734   match(Set dst (MulL src1 (LoadL src2)));
11735   effect(KILL cr);
11736   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11737 
11738   ins_cost(350);
11739   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11740   ins_encode %{
11741     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11742   %}
11743   ins_pipe(ialu_reg_mem_alu0);
11744 %}
11745 
11746 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11747 %{
11748   match(Set dst (MulL (LoadL src) imm));
11749   effect(KILL cr);
11750 
11751   ins_cost(300);
11752   format %{ "imulq   $dst, $src, $imm\t# long" %}
11753   ins_encode %{
11754     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11755   %}
11756   ins_pipe(ialu_reg_mem_alu0);
11757 %}
11758 
11759 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11760 %{
11761   match(Set dst (MulHiL src rax));
11762   effect(USE_KILL rax, KILL cr);
11763 
11764   ins_cost(300);
11765   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11766   ins_encode %{
11767     __ imulq($src$$Register);
11768   %}
11769   ins_pipe(ialu_reg_reg_alu0);
11770 %}
11771 
11772 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11773 %{
11774   match(Set dst (UMulHiL src rax));
11775   effect(USE_KILL rax, KILL cr);
11776 
11777   ins_cost(300);
11778   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11779   ins_encode %{
11780     __ mulq($src$$Register);
11781   %}
11782   ins_pipe(ialu_reg_reg_alu0);
11783 %}
11784 
11785 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11786                    rFlagsReg cr)
11787 %{
11788   match(Set rax (DivI rax div));
11789   effect(KILL rdx, KILL cr);
11790 
11791   ins_cost(30*100+10*100); // XXX
11792   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11793             "jne,s   normal\n\t"
11794             "xorl    rdx, rdx\n\t"
11795             "cmpl    $div, -1\n\t"
11796             "je,s    done\n"
11797     "normal: cdql\n\t"
11798             "idivl   $div\n"
11799     "done:"        %}
11800   ins_encode(cdql_enc(div));
11801   ins_pipe(ialu_reg_reg_alu0);
11802 %}
11803 
11804 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11805                    rFlagsReg cr)
11806 %{
11807   match(Set rax (DivL rax div));
11808   effect(KILL rdx, KILL cr);
11809 
11810   ins_cost(30*100+10*100); // XXX
11811   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11812             "cmpq    rax, rdx\n\t"
11813             "jne,s   normal\n\t"
11814             "xorl    rdx, rdx\n\t"
11815             "cmpq    $div, -1\n\t"
11816             "je,s    done\n"
11817     "normal: cdqq\n\t"
11818             "idivq   $div\n"
11819     "done:"        %}
11820   ins_encode(cdqq_enc(div));
11821   ins_pipe(ialu_reg_reg_alu0);
11822 %}
11823 
11824 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11825 %{
11826   match(Set rax (UDivI rax div));
11827   effect(KILL rdx, KILL cr);
11828 
11829   ins_cost(300);
11830   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11831   ins_encode %{
11832     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11833   %}
11834   ins_pipe(ialu_reg_reg_alu0);
11835 %}
11836 
11837 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11838 %{
11839   match(Set rax (UDivL rax div));
11840   effect(KILL rdx, KILL cr);
11841 
11842   ins_cost(300);
11843   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11844   ins_encode %{
11845      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11846   %}
11847   ins_pipe(ialu_reg_reg_alu0);
11848 %}
11849 
11850 // Integer DIVMOD with Register, both quotient and mod results
11851 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11852                              rFlagsReg cr)
11853 %{
11854   match(DivModI rax div);
11855   effect(KILL cr);
11856 
11857   ins_cost(30*100+10*100); // XXX
11858   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11859             "jne,s   normal\n\t"
11860             "xorl    rdx, rdx\n\t"
11861             "cmpl    $div, -1\n\t"
11862             "je,s    done\n"
11863     "normal: cdql\n\t"
11864             "idivl   $div\n"
11865     "done:"        %}
11866   ins_encode(cdql_enc(div));
11867   ins_pipe(pipe_slow);
11868 %}
11869 
11870 // Long DIVMOD with Register, both quotient and mod results
11871 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11872                              rFlagsReg cr)
11873 %{
11874   match(DivModL rax div);
11875   effect(KILL cr);
11876 
11877   ins_cost(30*100+10*100); // XXX
11878   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11879             "cmpq    rax, rdx\n\t"
11880             "jne,s   normal\n\t"
11881             "xorl    rdx, rdx\n\t"
11882             "cmpq    $div, -1\n\t"
11883             "je,s    done\n"
11884     "normal: cdqq\n\t"
11885             "idivq   $div\n"
11886     "done:"        %}
11887   ins_encode(cdqq_enc(div));
11888   ins_pipe(pipe_slow);
11889 %}
11890 
11891 // Unsigned integer DIVMOD with Register, both quotient and mod results
11892 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11893                               no_rax_rdx_RegI div, rFlagsReg cr)
11894 %{
11895   match(UDivModI rax div);
11896   effect(TEMP tmp, KILL cr);
11897 
11898   ins_cost(300);
11899   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11900             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11901           %}
11902   ins_encode %{
11903     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11904   %}
11905   ins_pipe(pipe_slow);
11906 %}
11907 
11908 // Unsigned long DIVMOD with Register, both quotient and mod results
11909 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11910                               no_rax_rdx_RegL div, rFlagsReg cr)
11911 %{
11912   match(UDivModL rax div);
11913   effect(TEMP tmp, KILL cr);
11914 
11915   ins_cost(300);
11916   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11917             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11918           %}
11919   ins_encode %{
11920     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11921   %}
11922   ins_pipe(pipe_slow);
11923 %}
11924 
11925 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11926                    rFlagsReg cr)
11927 %{
11928   match(Set rdx (ModI rax div));
11929   effect(KILL rax, KILL cr);
11930 
11931   ins_cost(300); // XXX
11932   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11933             "jne,s   normal\n\t"
11934             "xorl    rdx, rdx\n\t"
11935             "cmpl    $div, -1\n\t"
11936             "je,s    done\n"
11937     "normal: cdql\n\t"
11938             "idivl   $div\n"
11939     "done:"        %}
11940   ins_encode(cdql_enc(div));
11941   ins_pipe(ialu_reg_reg_alu0);
11942 %}
11943 
11944 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11945                    rFlagsReg cr)
11946 %{
11947   match(Set rdx (ModL rax div));
11948   effect(KILL rax, KILL cr);
11949 
11950   ins_cost(300); // XXX
11951   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11952             "cmpq    rax, rdx\n\t"
11953             "jne,s   normal\n\t"
11954             "xorl    rdx, rdx\n\t"
11955             "cmpq    $div, -1\n\t"
11956             "je,s    done\n"
11957     "normal: cdqq\n\t"
11958             "idivq   $div\n"
11959     "done:"        %}
11960   ins_encode(cdqq_enc(div));
11961   ins_pipe(ialu_reg_reg_alu0);
11962 %}
11963 
11964 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11965 %{
11966   match(Set rdx (UModI rax div));
11967   effect(KILL rax, KILL cr);
11968 
11969   ins_cost(300);
11970   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11971   ins_encode %{
11972     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11973   %}
11974   ins_pipe(ialu_reg_reg_alu0);
11975 %}
11976 
11977 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11978 %{
11979   match(Set rdx (UModL rax div));
11980   effect(KILL rax, KILL cr);
11981 
11982   ins_cost(300);
11983   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11984   ins_encode %{
11985     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11986   %}
11987   ins_pipe(ialu_reg_reg_alu0);
11988 %}
11989 
11990 // Integer Shift Instructions
11991 // Shift Left by one, two, three
11992 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11993 %{
11994   predicate(!UseAPX);
11995   match(Set dst (LShiftI dst shift));
11996   effect(KILL cr);
11997 
11998   format %{ "sall    $dst, $shift" %}
11999   ins_encode %{
12000     __ sall($dst$$Register, $shift$$constant);
12001   %}
12002   ins_pipe(ialu_reg);
12003 %}
12004 
12005 // Shift Left by one, two, three
12006 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
12007 %{
12008   predicate(UseAPX);
12009   match(Set dst (LShiftI src shift));
12010   effect(KILL cr);
12011   flag(PD::Flag_ndd_demotable_opr1);
12012 
12013   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
12014   ins_encode %{
12015     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
12016   %}
12017   ins_pipe(ialu_reg);
12018 %}
12019 
12020 // Shift Left by 8-bit immediate
12021 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12022 %{
12023   predicate(!UseAPX);
12024   match(Set dst (LShiftI dst shift));
12025   effect(KILL cr);
12026 
12027   format %{ "sall    $dst, $shift" %}
12028   ins_encode %{
12029     __ sall($dst$$Register, $shift$$constant);
12030   %}
12031   ins_pipe(ialu_reg);
12032 %}
12033 
12034 // Shift Left by 8-bit immediate
12035 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12036 %{
12037   predicate(UseAPX);
12038   match(Set dst (LShiftI src shift));
12039   effect(KILL cr);
12040   flag(PD::Flag_ndd_demotable_opr1);
12041 
12042   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
12043   ins_encode %{
12044     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
12045   %}
12046   ins_pipe(ialu_reg);
12047 %}
12048 
12049 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12050 %{
12051   predicate(UseAPX);
12052   match(Set dst (LShiftI (LoadI src) shift));
12053   effect(KILL cr);
12054 
12055   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
12056   ins_encode %{
12057     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
12058   %}
12059   ins_pipe(ialu_reg);
12060 %}
12061 
12062 // Shift Left by 8-bit immediate
12063 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12064 %{
12065   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12066   effect(KILL cr);
12067 
12068   format %{ "sall    $dst, $shift" %}
12069   ins_encode %{
12070     __ sall($dst$$Address, $shift$$constant);
12071   %}
12072   ins_pipe(ialu_mem_imm);
12073 %}
12074 
12075 // Shift Left by variable
12076 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12077 %{
12078   predicate(!VM_Version::supports_bmi2());
12079   match(Set dst (LShiftI dst shift));
12080   effect(KILL cr);
12081 
12082   format %{ "sall    $dst, $shift" %}
12083   ins_encode %{
12084     __ sall($dst$$Register);
12085   %}
12086   ins_pipe(ialu_reg_reg);
12087 %}
12088 
12089 // Shift Left by variable
12090 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12091 %{
12092   predicate(!VM_Version::supports_bmi2());
12093   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12094   effect(KILL cr);
12095 
12096   format %{ "sall    $dst, $shift" %}
12097   ins_encode %{
12098     __ sall($dst$$Address);
12099   %}
12100   ins_pipe(ialu_mem_reg);
12101 %}
12102 
12103 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12104 %{
12105   predicate(VM_Version::supports_bmi2());
12106   match(Set dst (LShiftI src shift));
12107 
12108   format %{ "shlxl   $dst, $src, $shift" %}
12109   ins_encode %{
12110     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12111   %}
12112   ins_pipe(ialu_reg_reg);
12113 %}
12114 
12115 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12116 %{
12117   predicate(VM_Version::supports_bmi2());
12118   match(Set dst (LShiftI (LoadI src) shift));
12119   ins_cost(175);
12120   format %{ "shlxl   $dst, $src, $shift" %}
12121   ins_encode %{
12122     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12123   %}
12124   ins_pipe(ialu_reg_mem);
12125 %}
12126 
12127 // Arithmetic Shift Right by 8-bit immediate
12128 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12129 %{
12130   predicate(!UseAPX);
12131   match(Set dst (RShiftI dst shift));
12132   effect(KILL cr);
12133 
12134   format %{ "sarl    $dst, $shift" %}
12135   ins_encode %{
12136     __ sarl($dst$$Register, $shift$$constant);
12137   %}
12138   ins_pipe(ialu_mem_imm);
12139 %}
12140 
12141 // Arithmetic Shift Right by 8-bit immediate
12142 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12143 %{
12144   predicate(UseAPX);
12145   match(Set dst (RShiftI src shift));
12146   effect(KILL cr);
12147   flag(PD::Flag_ndd_demotable_opr1);
12148 
12149   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12150   ins_encode %{
12151     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12152   %}
12153   ins_pipe(ialu_mem_imm);
12154 %}
12155 
12156 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12157 %{
12158   predicate(UseAPX);
12159   match(Set dst (RShiftI (LoadI src) shift));
12160   effect(KILL cr);
12161 
12162   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12163   ins_encode %{
12164     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12165   %}
12166   ins_pipe(ialu_mem_imm);
12167 %}
12168 
12169 // Arithmetic Shift Right by 8-bit immediate
12170 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12171 %{
12172   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12173   effect(KILL cr);
12174 
12175   format %{ "sarl    $dst, $shift" %}
12176   ins_encode %{
12177     __ sarl($dst$$Address, $shift$$constant);
12178   %}
12179   ins_pipe(ialu_mem_imm);
12180 %}
12181 
12182 // Arithmetic Shift Right by variable
12183 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12184 %{
12185   predicate(!VM_Version::supports_bmi2());
12186   match(Set dst (RShiftI dst shift));
12187   effect(KILL cr);
12188 
12189   format %{ "sarl    $dst, $shift" %}
12190   ins_encode %{
12191     __ sarl($dst$$Register);
12192   %}
12193   ins_pipe(ialu_reg_reg);
12194 %}
12195 
12196 // Arithmetic Shift Right by variable
12197 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12198 %{
12199   predicate(!VM_Version::supports_bmi2());
12200   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12201   effect(KILL cr);
12202 
12203   format %{ "sarl    $dst, $shift" %}
12204   ins_encode %{
12205     __ sarl($dst$$Address);
12206   %}
12207   ins_pipe(ialu_mem_reg);
12208 %}
12209 
12210 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12211 %{
12212   predicate(VM_Version::supports_bmi2());
12213   match(Set dst (RShiftI src shift));
12214 
12215   format %{ "sarxl   $dst, $src, $shift" %}
12216   ins_encode %{
12217     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12218   %}
12219   ins_pipe(ialu_reg_reg);
12220 %}
12221 
12222 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12223 %{
12224   predicate(VM_Version::supports_bmi2());
12225   match(Set dst (RShiftI (LoadI src) shift));
12226   ins_cost(175);
12227   format %{ "sarxl   $dst, $src, $shift" %}
12228   ins_encode %{
12229     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12230   %}
12231   ins_pipe(ialu_reg_mem);
12232 %}
12233 
12234 // Logical Shift Right by 8-bit immediate
12235 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12236 %{
12237   predicate(!UseAPX);
12238   match(Set dst (URShiftI dst shift));
12239   effect(KILL cr);
12240 
12241   format %{ "shrl    $dst, $shift" %}
12242   ins_encode %{
12243     __ shrl($dst$$Register, $shift$$constant);
12244   %}
12245   ins_pipe(ialu_reg);
12246 %}
12247 
12248 // Logical Shift Right by 8-bit immediate
12249 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12250 %{
12251   predicate(UseAPX);
12252   match(Set dst (URShiftI src shift));
12253   effect(KILL cr);
12254   flag(PD::Flag_ndd_demotable_opr1);
12255 
12256   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12257   ins_encode %{
12258     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12259   %}
12260   ins_pipe(ialu_reg);
12261 %}
12262 
12263 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12264 %{
12265   predicate(UseAPX);
12266   match(Set dst (URShiftI (LoadI src) shift));
12267   effect(KILL cr);
12268 
12269   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12270   ins_encode %{
12271     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12272   %}
12273   ins_pipe(ialu_reg);
12274 %}
12275 
12276 // Logical Shift Right by 8-bit immediate
12277 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12278 %{
12279   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12280   effect(KILL cr);
12281 
12282   format %{ "shrl    $dst, $shift" %}
12283   ins_encode %{
12284     __ shrl($dst$$Address, $shift$$constant);
12285   %}
12286   ins_pipe(ialu_mem_imm);
12287 %}
12288 
12289 // Logical Shift Right by variable
12290 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12291 %{
12292   predicate(!VM_Version::supports_bmi2());
12293   match(Set dst (URShiftI dst shift));
12294   effect(KILL cr);
12295 
12296   format %{ "shrl    $dst, $shift" %}
12297   ins_encode %{
12298     __ shrl($dst$$Register);
12299   %}
12300   ins_pipe(ialu_reg_reg);
12301 %}
12302 
12303 // Logical Shift Right by variable
12304 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12305 %{
12306   predicate(!VM_Version::supports_bmi2());
12307   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12308   effect(KILL cr);
12309 
12310   format %{ "shrl    $dst, $shift" %}
12311   ins_encode %{
12312     __ shrl($dst$$Address);
12313   %}
12314   ins_pipe(ialu_mem_reg);
12315 %}
12316 
12317 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12318 %{
12319   predicate(VM_Version::supports_bmi2());
12320   match(Set dst (URShiftI src shift));
12321 
12322   format %{ "shrxl   $dst, $src, $shift" %}
12323   ins_encode %{
12324     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12325   %}
12326   ins_pipe(ialu_reg_reg);
12327 %}
12328 
12329 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12330 %{
12331   predicate(VM_Version::supports_bmi2());
12332   match(Set dst (URShiftI (LoadI src) shift));
12333   ins_cost(175);
12334   format %{ "shrxl   $dst, $src, $shift" %}
12335   ins_encode %{
12336     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12337   %}
12338   ins_pipe(ialu_reg_mem);
12339 %}
12340 
12341 // Long Shift Instructions
12342 // Shift Left by one, two, three
12343 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12344 %{
12345   predicate(!UseAPX);
12346   match(Set dst (LShiftL dst shift));
12347   effect(KILL cr);
12348 
12349   format %{ "salq    $dst, $shift" %}
12350   ins_encode %{
12351     __ salq($dst$$Register, $shift$$constant);
12352   %}
12353   ins_pipe(ialu_reg);
12354 %}
12355 
12356 // Shift Left by one, two, three
12357 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12358 %{
12359   predicate(UseAPX);
12360   match(Set dst (LShiftL src shift));
12361   effect(KILL cr);
12362   flag(PD::Flag_ndd_demotable_opr1);
12363 
12364   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12365   ins_encode %{
12366     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12367   %}
12368   ins_pipe(ialu_reg);
12369 %}
12370 
12371 // Shift Left by 8-bit immediate
12372 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12373 %{
12374   predicate(!UseAPX);
12375   match(Set dst (LShiftL dst shift));
12376   effect(KILL cr);
12377 
12378   format %{ "salq    $dst, $shift" %}
12379   ins_encode %{
12380     __ salq($dst$$Register, $shift$$constant);
12381   %}
12382   ins_pipe(ialu_reg);
12383 %}
12384 
12385 // Shift Left by 8-bit immediate
12386 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12387 %{
12388   predicate(UseAPX);
12389   match(Set dst (LShiftL src shift));
12390   effect(KILL cr);
12391   flag(PD::Flag_ndd_demotable_opr1);
12392 
12393   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12394   ins_encode %{
12395     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12396   %}
12397   ins_pipe(ialu_reg);
12398 %}
12399 
12400 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12401 %{
12402   predicate(UseAPX);
12403   match(Set dst (LShiftL (LoadL src) shift));
12404   effect(KILL cr);
12405 
12406   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12407   ins_encode %{
12408     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12409   %}
12410   ins_pipe(ialu_reg);
12411 %}
12412 
12413 // Shift Left by 8-bit immediate
12414 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12415 %{
12416   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12417   effect(KILL cr);
12418 
12419   format %{ "salq    $dst, $shift" %}
12420   ins_encode %{
12421     __ salq($dst$$Address, $shift$$constant);
12422   %}
12423   ins_pipe(ialu_mem_imm);
12424 %}
12425 
12426 // Shift Left by variable
12427 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12428 %{
12429   predicate(!VM_Version::supports_bmi2());
12430   match(Set dst (LShiftL dst shift));
12431   effect(KILL cr);
12432 
12433   format %{ "salq    $dst, $shift" %}
12434   ins_encode %{
12435     __ salq($dst$$Register);
12436   %}
12437   ins_pipe(ialu_reg_reg);
12438 %}
12439 
12440 // Shift Left by variable
12441 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12442 %{
12443   predicate(!VM_Version::supports_bmi2());
12444   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12445   effect(KILL cr);
12446 
12447   format %{ "salq    $dst, $shift" %}
12448   ins_encode %{
12449     __ salq($dst$$Address);
12450   %}
12451   ins_pipe(ialu_mem_reg);
12452 %}
12453 
12454 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12455 %{
12456   predicate(VM_Version::supports_bmi2());
12457   match(Set dst (LShiftL src shift));
12458 
12459   format %{ "shlxq   $dst, $src, $shift" %}
12460   ins_encode %{
12461     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12462   %}
12463   ins_pipe(ialu_reg_reg);
12464 %}
12465 
12466 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12467 %{
12468   predicate(VM_Version::supports_bmi2());
12469   match(Set dst (LShiftL (LoadL src) shift));
12470   ins_cost(175);
12471   format %{ "shlxq   $dst, $src, $shift" %}
12472   ins_encode %{
12473     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12474   %}
12475   ins_pipe(ialu_reg_mem);
12476 %}
12477 
12478 // Arithmetic Shift Right by 8-bit immediate
12479 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12480 %{
12481   predicate(!UseAPX);
12482   match(Set dst (RShiftL dst shift));
12483   effect(KILL cr);
12484 
12485   format %{ "sarq    $dst, $shift" %}
12486   ins_encode %{
12487     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12488   %}
12489   ins_pipe(ialu_mem_imm);
12490 %}
12491 
12492 // Arithmetic Shift Right by 8-bit immediate
12493 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12494 %{
12495   predicate(UseAPX);
12496   match(Set dst (RShiftL src shift));
12497   effect(KILL cr);
12498   flag(PD::Flag_ndd_demotable_opr1);
12499 
12500   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12501   ins_encode %{
12502     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12503   %}
12504   ins_pipe(ialu_mem_imm);
12505 %}
12506 
12507 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12508 %{
12509   predicate(UseAPX);
12510   match(Set dst (RShiftL (LoadL src) shift));
12511   effect(KILL cr);
12512 
12513   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12514   ins_encode %{
12515     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12516   %}
12517   ins_pipe(ialu_mem_imm);
12518 %}
12519 
12520 // Arithmetic Shift Right by 8-bit immediate
12521 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12522 %{
12523   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12524   effect(KILL cr);
12525 
12526   format %{ "sarq    $dst, $shift" %}
12527   ins_encode %{
12528     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12529   %}
12530   ins_pipe(ialu_mem_imm);
12531 %}
12532 
12533 // Arithmetic Shift Right by variable
12534 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12535 %{
12536   predicate(!VM_Version::supports_bmi2());
12537   match(Set dst (RShiftL dst shift));
12538   effect(KILL cr);
12539 
12540   format %{ "sarq    $dst, $shift" %}
12541   ins_encode %{
12542     __ sarq($dst$$Register);
12543   %}
12544   ins_pipe(ialu_reg_reg);
12545 %}
12546 
12547 // Arithmetic Shift Right by variable
12548 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12549 %{
12550   predicate(!VM_Version::supports_bmi2());
12551   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12552   effect(KILL cr);
12553 
12554   format %{ "sarq    $dst, $shift" %}
12555   ins_encode %{
12556     __ sarq($dst$$Address);
12557   %}
12558   ins_pipe(ialu_mem_reg);
12559 %}
12560 
12561 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12562 %{
12563   predicate(VM_Version::supports_bmi2());
12564   match(Set dst (RShiftL src shift));
12565 
12566   format %{ "sarxq   $dst, $src, $shift" %}
12567   ins_encode %{
12568     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12569   %}
12570   ins_pipe(ialu_reg_reg);
12571 %}
12572 
12573 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12574 %{
12575   predicate(VM_Version::supports_bmi2());
12576   match(Set dst (RShiftL (LoadL src) shift));
12577   ins_cost(175);
12578   format %{ "sarxq   $dst, $src, $shift" %}
12579   ins_encode %{
12580     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12581   %}
12582   ins_pipe(ialu_reg_mem);
12583 %}
12584 
12585 // Logical Shift Right by 8-bit immediate
12586 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12587 %{
12588   predicate(!UseAPX);
12589   match(Set dst (URShiftL dst shift));
12590   effect(KILL cr);
12591 
12592   format %{ "shrq    $dst, $shift" %}
12593   ins_encode %{
12594     __ shrq($dst$$Register, $shift$$constant);
12595   %}
12596   ins_pipe(ialu_reg);
12597 %}
12598 
12599 // Logical Shift Right by 8-bit immediate
12600 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12601 %{
12602   predicate(UseAPX);
12603   match(Set dst (URShiftL src shift));
12604   effect(KILL cr);
12605   flag(PD::Flag_ndd_demotable_opr1);
12606 
12607   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12608   ins_encode %{
12609     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12610   %}
12611   ins_pipe(ialu_reg);
12612 %}
12613 
12614 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12615 %{
12616   predicate(UseAPX);
12617   match(Set dst (URShiftL (LoadL src) shift));
12618   effect(KILL cr);
12619 
12620   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12621   ins_encode %{
12622     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12623   %}
12624   ins_pipe(ialu_reg);
12625 %}
12626 
12627 // Logical Shift Right by 8-bit immediate
12628 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12629 %{
12630   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12631   effect(KILL cr);
12632 
12633   format %{ "shrq    $dst, $shift" %}
12634   ins_encode %{
12635     __ shrq($dst$$Address, $shift$$constant);
12636   %}
12637   ins_pipe(ialu_mem_imm);
12638 %}
12639 
12640 // Logical Shift Right by variable
12641 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12642 %{
12643   predicate(!VM_Version::supports_bmi2());
12644   match(Set dst (URShiftL dst shift));
12645   effect(KILL cr);
12646 
12647   format %{ "shrq    $dst, $shift" %}
12648   ins_encode %{
12649     __ shrq($dst$$Register);
12650   %}
12651   ins_pipe(ialu_reg_reg);
12652 %}
12653 
12654 // Logical Shift Right by variable
12655 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12656 %{
12657   predicate(!VM_Version::supports_bmi2());
12658   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12659   effect(KILL cr);
12660 
12661   format %{ "shrq    $dst, $shift" %}
12662   ins_encode %{
12663     __ shrq($dst$$Address);
12664   %}
12665   ins_pipe(ialu_mem_reg);
12666 %}
12667 
12668 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12669 %{
12670   predicate(VM_Version::supports_bmi2());
12671   match(Set dst (URShiftL src shift));
12672 
12673   format %{ "shrxq   $dst, $src, $shift" %}
12674   ins_encode %{
12675     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12676   %}
12677   ins_pipe(ialu_reg_reg);
12678 %}
12679 
12680 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12681 %{
12682   predicate(VM_Version::supports_bmi2());
12683   match(Set dst (URShiftL (LoadL src) shift));
12684   ins_cost(175);
12685   format %{ "shrxq   $dst, $src, $shift" %}
12686   ins_encode %{
12687     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12688   %}
12689   ins_pipe(ialu_reg_mem);
12690 %}
12691 
12692 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12693 // This idiom is used by the compiler for the i2b bytecode.
12694 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12695 %{
12696   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12697 
12698   format %{ "movsbl  $dst, $src\t# i2b" %}
12699   ins_encode %{
12700     __ movsbl($dst$$Register, $src$$Register);
12701   %}
12702   ins_pipe(ialu_reg_reg);
12703 %}
12704 
12705 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12706 // This idiom is used by the compiler the i2s bytecode.
12707 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12708 %{
12709   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12710 
12711   format %{ "movswl  $dst, $src\t# i2s" %}
12712   ins_encode %{
12713     __ movswl($dst$$Register, $src$$Register);
12714   %}
12715   ins_pipe(ialu_reg_reg);
12716 %}
12717 
12718 // ROL/ROR instructions
12719 
12720 // Rotate left by constant.
12721 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12722 %{
12723   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12724   match(Set dst (RotateLeft dst shift));
12725   effect(KILL cr);
12726   format %{ "roll    $dst, $shift" %}
12727   ins_encode %{
12728     __ roll($dst$$Register, $shift$$constant);
12729   %}
12730   ins_pipe(ialu_reg);
12731 %}
12732 
12733 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12734 %{
12735   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12736   match(Set dst (RotateLeft src shift));
12737   format %{ "rolxl   $dst, $src, $shift" %}
12738   ins_encode %{
12739     int shift = 32 - ($shift$$constant & 31);
12740     __ rorxl($dst$$Register, $src$$Register, shift);
12741   %}
12742   ins_pipe(ialu_reg_reg);
12743 %}
12744 
12745 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12746 %{
12747   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12748   match(Set dst (RotateLeft (LoadI src) shift));
12749   ins_cost(175);
12750   format %{ "rolxl   $dst, $src, $shift" %}
12751   ins_encode %{
12752     int shift = 32 - ($shift$$constant & 31);
12753     __ rorxl($dst$$Register, $src$$Address, shift);
12754   %}
12755   ins_pipe(ialu_reg_mem);
12756 %}
12757 
12758 // Rotate Left by variable
12759 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12760 %{
12761   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12762   match(Set dst (RotateLeft dst shift));
12763   effect(KILL cr);
12764   format %{ "roll    $dst, $shift" %}
12765   ins_encode %{
12766     __ roll($dst$$Register);
12767   %}
12768   ins_pipe(ialu_reg_reg);
12769 %}
12770 
12771 // Rotate Left by variable
12772 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12773 %{
12774   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12775   match(Set dst (RotateLeft src shift));
12776   effect(KILL cr);
12777   flag(PD::Flag_ndd_demotable_opr1);
12778 
12779   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12780   ins_encode %{
12781     __ eroll($dst$$Register, $src$$Register, false);
12782   %}
12783   ins_pipe(ialu_reg_reg);
12784 %}
12785 
12786 // Rotate Right by constant.
12787 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12788 %{
12789   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12790   match(Set dst (RotateRight dst shift));
12791   effect(KILL cr);
12792   format %{ "rorl    $dst, $shift" %}
12793   ins_encode %{
12794     __ rorl($dst$$Register, $shift$$constant);
12795   %}
12796   ins_pipe(ialu_reg);
12797 %}
12798 
12799 // Rotate Right by constant.
12800 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12801 %{
12802   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12803   match(Set dst (RotateRight src shift));
12804   format %{ "rorxl   $dst, $src, $shift" %}
12805   ins_encode %{
12806     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12807   %}
12808   ins_pipe(ialu_reg_reg);
12809 %}
12810 
12811 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12812 %{
12813   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12814   match(Set dst (RotateRight (LoadI src) shift));
12815   ins_cost(175);
12816   format %{ "rorxl   $dst, $src, $shift" %}
12817   ins_encode %{
12818     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12819   %}
12820   ins_pipe(ialu_reg_mem);
12821 %}
12822 
12823 // Rotate Right by variable
12824 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12825 %{
12826   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12827   match(Set dst (RotateRight dst shift));
12828   effect(KILL cr);
12829   format %{ "rorl    $dst, $shift" %}
12830   ins_encode %{
12831     __ rorl($dst$$Register);
12832   %}
12833   ins_pipe(ialu_reg_reg);
12834 %}
12835 
12836 // Rotate Right by variable
12837 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12838 %{
12839   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12840   match(Set dst (RotateRight src shift));
12841   effect(KILL cr);
12842   flag(PD::Flag_ndd_demotable_opr1);
12843 
12844   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12845   ins_encode %{
12846     __ erorl($dst$$Register, $src$$Register, false);
12847   %}
12848   ins_pipe(ialu_reg_reg);
12849 %}
12850 
12851 // Rotate Left by constant.
12852 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12853 %{
12854   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12855   match(Set dst (RotateLeft dst shift));
12856   effect(KILL cr);
12857   format %{ "rolq    $dst, $shift" %}
12858   ins_encode %{
12859     __ rolq($dst$$Register, $shift$$constant);
12860   %}
12861   ins_pipe(ialu_reg);
12862 %}
12863 
12864 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12865 %{
12866   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12867   match(Set dst (RotateLeft src shift));
12868   format %{ "rolxq   $dst, $src, $shift" %}
12869   ins_encode %{
12870     int shift = 64 - ($shift$$constant & 63);
12871     __ rorxq($dst$$Register, $src$$Register, shift);
12872   %}
12873   ins_pipe(ialu_reg_reg);
12874 %}
12875 
12876 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12877 %{
12878   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12879   match(Set dst (RotateLeft (LoadL src) shift));
12880   ins_cost(175);
12881   format %{ "rolxq   $dst, $src, $shift" %}
12882   ins_encode %{
12883     int shift = 64 - ($shift$$constant & 63);
12884     __ rorxq($dst$$Register, $src$$Address, shift);
12885   %}
12886   ins_pipe(ialu_reg_mem);
12887 %}
12888 
12889 // Rotate Left by variable
12890 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12891 %{
12892   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12893   match(Set dst (RotateLeft dst shift));
12894   effect(KILL cr);
12895 
12896   format %{ "rolq    $dst, $shift" %}
12897   ins_encode %{
12898     __ rolq($dst$$Register);
12899   %}
12900   ins_pipe(ialu_reg_reg);
12901 %}
12902 
12903 // Rotate Left by variable
12904 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12905 %{
12906   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12907   match(Set dst (RotateLeft src shift));
12908   effect(KILL cr);
12909   flag(PD::Flag_ndd_demotable_opr1);
12910 
12911   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12912   ins_encode %{
12913     __ erolq($dst$$Register, $src$$Register, false);
12914   %}
12915   ins_pipe(ialu_reg_reg);
12916 %}
12917 
12918 // Rotate Right by constant.
12919 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12920 %{
12921   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12922   match(Set dst (RotateRight dst shift));
12923   effect(KILL cr);
12924   format %{ "rorq    $dst, $shift" %}
12925   ins_encode %{
12926     __ rorq($dst$$Register, $shift$$constant);
12927   %}
12928   ins_pipe(ialu_reg);
12929 %}
12930 
12931 // Rotate Right by constant
12932 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12933 %{
12934   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12935   match(Set dst (RotateRight src shift));
12936   format %{ "rorxq   $dst, $src, $shift" %}
12937   ins_encode %{
12938     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12939   %}
12940   ins_pipe(ialu_reg_reg);
12941 %}
12942 
12943 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12944 %{
12945   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12946   match(Set dst (RotateRight (LoadL src) shift));
12947   ins_cost(175);
12948   format %{ "rorxq   $dst, $src, $shift" %}
12949   ins_encode %{
12950     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12951   %}
12952   ins_pipe(ialu_reg_mem);
12953 %}
12954 
12955 // Rotate Right by variable
12956 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12957 %{
12958   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12959   match(Set dst (RotateRight dst shift));
12960   effect(KILL cr);
12961   format %{ "rorq    $dst, $shift" %}
12962   ins_encode %{
12963     __ rorq($dst$$Register);
12964   %}
12965   ins_pipe(ialu_reg_reg);
12966 %}
12967 
12968 // Rotate Right by variable
12969 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12970 %{
12971   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12972   match(Set dst (RotateRight src shift));
12973   effect(KILL cr);
12974   flag(PD::Flag_ndd_demotable_opr1);
12975 
12976   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12977   ins_encode %{
12978     __ erorq($dst$$Register, $src$$Register, false);
12979   %}
12980   ins_pipe(ialu_reg_reg);
12981 %}
12982 
12983 //----------------------------- CompressBits/ExpandBits ------------------------
12984 
12985 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12986   predicate(n->bottom_type()->isa_long());
12987   match(Set dst (CompressBits src mask));
12988   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12989   ins_encode %{
12990     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12991   %}
12992   ins_pipe( pipe_slow );
12993 %}
12994 
12995 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12996   predicate(n->bottom_type()->isa_long());
12997   match(Set dst (ExpandBits src mask));
12998   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12999   ins_encode %{
13000     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
13001   %}
13002   ins_pipe( pipe_slow );
13003 %}
13004 
13005 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
13006   predicate(n->bottom_type()->isa_long());
13007   match(Set dst (CompressBits src (LoadL mask)));
13008   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
13009   ins_encode %{
13010     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
13011   %}
13012   ins_pipe( pipe_slow );
13013 %}
13014 
13015 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
13016   predicate(n->bottom_type()->isa_long());
13017   match(Set dst (ExpandBits src (LoadL mask)));
13018   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
13019   ins_encode %{
13020     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
13021   %}
13022   ins_pipe( pipe_slow );
13023 %}
13024 
13025 
13026 // Logical Instructions
13027 
13028 // Integer Logical Instructions
13029 
13030 // And Instructions
13031 // And Register with Register
13032 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13033 %{
13034   predicate(!UseAPX);
13035   match(Set dst (AndI dst src));
13036   effect(KILL cr);
13037   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13038 
13039   format %{ "andl    $dst, $src\t# int" %}
13040   ins_encode %{
13041     __ andl($dst$$Register, $src$$Register);
13042   %}
13043   ins_pipe(ialu_reg_reg);
13044 %}
13045 
13046 // And Register with Register using New Data Destination (NDD)
13047 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13048 %{
13049   predicate(UseAPX);
13050   match(Set dst (AndI src1 src2));
13051   effect(KILL cr);
13052   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13053 
13054   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
13055   ins_encode %{
13056     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
13057 
13058   %}
13059   ins_pipe(ialu_reg_reg);
13060 %}
13061 
13062 // And Register with Immediate 255
13063 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
13064 %{
13065   match(Set dst (AndI src mask));
13066 
13067   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
13068   ins_encode %{
13069     __ movzbl($dst$$Register, $src$$Register);
13070   %}
13071   ins_pipe(ialu_reg);
13072 %}
13073 
13074 // And Register with Immediate 255 and promote to long
13075 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
13076 %{
13077   match(Set dst (ConvI2L (AndI src mask)));
13078 
13079   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
13080   ins_encode %{
13081     __ movzbl($dst$$Register, $src$$Register);
13082   %}
13083   ins_pipe(ialu_reg);
13084 %}
13085 
13086 // And Register with Immediate 65535
13087 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13088 %{
13089   match(Set dst (AndI src mask));
13090 
13091   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
13092   ins_encode %{
13093     __ movzwl($dst$$Register, $src$$Register);
13094   %}
13095   ins_pipe(ialu_reg);
13096 %}
13097 
13098 // And Register with Immediate 65535 and promote to long
13099 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13100 %{
13101   match(Set dst (ConvI2L (AndI src mask)));
13102 
13103   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
13104   ins_encode %{
13105     __ movzwl($dst$$Register, $src$$Register);
13106   %}
13107   ins_pipe(ialu_reg);
13108 %}
13109 
13110 // Can skip int2long conversions after AND with small bitmask
13111 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13112 %{
13113   predicate(VM_Version::supports_bmi2());
13114   ins_cost(125);
13115   effect(TEMP tmp, KILL cr);
13116   match(Set dst (ConvI2L (AndI src mask)));
13117   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
13118   ins_encode %{
13119     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13120     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13121   %}
13122   ins_pipe(ialu_reg_reg);
13123 %}
13124 
13125 // And Register with Immediate
13126 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13127 %{
13128   predicate(!UseAPX);
13129   match(Set dst (AndI dst src));
13130   effect(KILL cr);
13131   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13132 
13133   format %{ "andl    $dst, $src\t# int" %}
13134   ins_encode %{
13135     __ andl($dst$$Register, $src$$constant);
13136   %}
13137   ins_pipe(ialu_reg);
13138 %}
13139 
13140 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13141 %{
13142   predicate(UseAPX);
13143   match(Set dst (AndI src1 src2));
13144   effect(KILL cr);
13145   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13146 
13147   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13148   ins_encode %{
13149     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13150   %}
13151   ins_pipe(ialu_reg);
13152 %}
13153 
13154 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13155 %{
13156   predicate(UseAPX);
13157   match(Set dst (AndI (LoadI src1) src2));
13158   effect(KILL cr);
13159   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13160 
13161   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13162   ins_encode %{
13163     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13164   %}
13165   ins_pipe(ialu_reg);
13166 %}
13167 
13168 // And Register with Memory
13169 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13170 %{
13171   predicate(!UseAPX);
13172   match(Set dst (AndI dst (LoadI src)));
13173   effect(KILL cr);
13174   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13175 
13176   ins_cost(150);
13177   format %{ "andl    $dst, $src\t# int" %}
13178   ins_encode %{
13179     __ andl($dst$$Register, $src$$Address);
13180   %}
13181   ins_pipe(ialu_reg_mem);
13182 %}
13183 
13184 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13185 %{
13186   predicate(UseAPX);
13187   match(Set dst (AndI src1 (LoadI src2)));
13188   effect(KILL cr);
13189   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13190 
13191   ins_cost(150);
13192   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13193   ins_encode %{
13194     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13195   %}
13196   ins_pipe(ialu_reg_mem);
13197 %}
13198 
13199 // And Memory with Register
13200 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13201 %{
13202   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13203   effect(KILL cr);
13204   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13205 
13206   ins_cost(150);
13207   format %{ "andb    $dst, $src\t# byte" %}
13208   ins_encode %{
13209     __ andb($dst$$Address, $src$$Register);
13210   %}
13211   ins_pipe(ialu_mem_reg);
13212 %}
13213 
13214 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13215 %{
13216   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13217   effect(KILL cr);
13218   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13219 
13220   ins_cost(150);
13221   format %{ "andl    $dst, $src\t# int" %}
13222   ins_encode %{
13223     __ andl($dst$$Address, $src$$Register);
13224   %}
13225   ins_pipe(ialu_mem_reg);
13226 %}
13227 
13228 // And Memory with Immediate
13229 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13230 %{
13231   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13232   effect(KILL cr);
13233   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13234 
13235   ins_cost(125);
13236   format %{ "andl    $dst, $src\t# int" %}
13237   ins_encode %{
13238     __ andl($dst$$Address, $src$$constant);
13239   %}
13240   ins_pipe(ialu_mem_imm);
13241 %}
13242 
13243 // BMI1 instructions
13244 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13245   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13246   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13247   effect(KILL cr);
13248   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13249 
13250   ins_cost(125);
13251   format %{ "andnl  $dst, $src1, $src2" %}
13252 
13253   ins_encode %{
13254     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13255   %}
13256   ins_pipe(ialu_reg_mem);
13257 %}
13258 
13259 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13260   match(Set dst (AndI (XorI src1 minus_1) src2));
13261   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13262   effect(KILL cr);
13263   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13264 
13265   format %{ "andnl  $dst, $src1, $src2" %}
13266 
13267   ins_encode %{
13268     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13269   %}
13270   ins_pipe(ialu_reg);
13271 %}
13272 
13273 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13274   match(Set dst (AndI (SubI imm_zero src) src));
13275   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13276   effect(KILL cr);
13277   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13278 
13279   format %{ "blsil  $dst, $src" %}
13280 
13281   ins_encode %{
13282     __ blsil($dst$$Register, $src$$Register);
13283   %}
13284   ins_pipe(ialu_reg);
13285 %}
13286 
13287 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13288   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13289   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13290   effect(KILL cr);
13291   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13292 
13293   ins_cost(125);
13294   format %{ "blsil  $dst, $src" %}
13295 
13296   ins_encode %{
13297     __ blsil($dst$$Register, $src$$Address);
13298   %}
13299   ins_pipe(ialu_reg_mem);
13300 %}
13301 
13302 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13303 %{
13304   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13305   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13306   effect(KILL cr);
13307   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13308 
13309   ins_cost(125);
13310   format %{ "blsmskl $dst, $src" %}
13311 
13312   ins_encode %{
13313     __ blsmskl($dst$$Register, $src$$Address);
13314   %}
13315   ins_pipe(ialu_reg_mem);
13316 %}
13317 
13318 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13319 %{
13320   match(Set dst (XorI (AddI src minus_1) src));
13321   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13322   effect(KILL cr);
13323   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13324 
13325   format %{ "blsmskl $dst, $src" %}
13326 
13327   ins_encode %{
13328     __ blsmskl($dst$$Register, $src$$Register);
13329   %}
13330 
13331   ins_pipe(ialu_reg);
13332 %}
13333 
13334 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13335 %{
13336   match(Set dst (AndI (AddI src minus_1) src) );
13337   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13338   effect(KILL cr);
13339   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13340 
13341   format %{ "blsrl  $dst, $src" %}
13342 
13343   ins_encode %{
13344     __ blsrl($dst$$Register, $src$$Register);
13345   %}
13346 
13347   ins_pipe(ialu_reg_mem);
13348 %}
13349 
13350 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13351 %{
13352   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13353   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13354   effect(KILL cr);
13355   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13356 
13357   ins_cost(125);
13358   format %{ "blsrl  $dst, $src" %}
13359 
13360   ins_encode %{
13361     __ blsrl($dst$$Register, $src$$Address);
13362   %}
13363 
13364   ins_pipe(ialu_reg);
13365 %}
13366 
13367 // Or Instructions
13368 // Or Register with Register
13369 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13370 %{
13371   predicate(!UseAPX);
13372   match(Set dst (OrI dst src));
13373   effect(KILL cr);
13374   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13375 
13376   format %{ "orl     $dst, $src\t# int" %}
13377   ins_encode %{
13378     __ orl($dst$$Register, $src$$Register);
13379   %}
13380   ins_pipe(ialu_reg_reg);
13381 %}
13382 
13383 // Or Register with Register using New Data Destination (NDD)
13384 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13385 %{
13386   predicate(UseAPX);
13387   match(Set dst (OrI src1 src2));
13388   effect(KILL cr);
13389   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13390 
13391   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13392   ins_encode %{
13393     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13394   %}
13395   ins_pipe(ialu_reg_reg);
13396 %}
13397 
13398 // Or Register with Immediate
13399 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13400 %{
13401   predicate(!UseAPX);
13402   match(Set dst (OrI dst src));
13403   effect(KILL cr);
13404   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13405 
13406   format %{ "orl     $dst, $src\t# int" %}
13407   ins_encode %{
13408     __ orl($dst$$Register, $src$$constant);
13409   %}
13410   ins_pipe(ialu_reg);
13411 %}
13412 
13413 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13414 %{
13415   predicate(UseAPX);
13416   match(Set dst (OrI src1 src2));
13417   effect(KILL cr);
13418   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13419 
13420   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13421   ins_encode %{
13422     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13423   %}
13424   ins_pipe(ialu_reg);
13425 %}
13426 
13427 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13428 %{
13429   predicate(UseAPX);
13430   match(Set dst (OrI src1 src2));
13431   effect(KILL cr);
13432   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13433 
13434   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13435   ins_encode %{
13436     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13437   %}
13438   ins_pipe(ialu_reg);
13439 %}
13440 
13441 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13442 %{
13443   predicate(UseAPX);
13444   match(Set dst (OrI (LoadI src1) src2));
13445   effect(KILL cr);
13446   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13447 
13448   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13449   ins_encode %{
13450     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13451   %}
13452   ins_pipe(ialu_reg);
13453 %}
13454 
13455 // Or Register with Memory
13456 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13457 %{
13458   predicate(!UseAPX);
13459   match(Set dst (OrI dst (LoadI src)));
13460   effect(KILL cr);
13461   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13462 
13463   ins_cost(150);
13464   format %{ "orl     $dst, $src\t# int" %}
13465   ins_encode %{
13466     __ orl($dst$$Register, $src$$Address);
13467   %}
13468   ins_pipe(ialu_reg_mem);
13469 %}
13470 
13471 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13472 %{
13473   predicate(UseAPX);
13474   match(Set dst (OrI src1 (LoadI src2)));
13475   effect(KILL cr);
13476   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13477 
13478   ins_cost(150);
13479   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13480   ins_encode %{
13481     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13482   %}
13483   ins_pipe(ialu_reg_mem);
13484 %}
13485 
13486 // Or Memory with Register
13487 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13488 %{
13489   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13490   effect(KILL cr);
13491   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13492 
13493   ins_cost(150);
13494   format %{ "orb    $dst, $src\t# byte" %}
13495   ins_encode %{
13496     __ orb($dst$$Address, $src$$Register);
13497   %}
13498   ins_pipe(ialu_mem_reg);
13499 %}
13500 
13501 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13502 %{
13503   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13504   effect(KILL cr);
13505   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13506 
13507   ins_cost(150);
13508   format %{ "orl     $dst, $src\t# int" %}
13509   ins_encode %{
13510     __ orl($dst$$Address, $src$$Register);
13511   %}
13512   ins_pipe(ialu_mem_reg);
13513 %}
13514 
13515 // Or Memory with Immediate
13516 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13517 %{
13518   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13519   effect(KILL cr);
13520   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13521 
13522   ins_cost(125);
13523   format %{ "orl     $dst, $src\t# int" %}
13524   ins_encode %{
13525     __ orl($dst$$Address, $src$$constant);
13526   %}
13527   ins_pipe(ialu_mem_imm);
13528 %}
13529 
13530 // Xor Instructions
13531 // Xor Register with Register
13532 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13533 %{
13534   predicate(!UseAPX);
13535   match(Set dst (XorI dst src));
13536   effect(KILL cr);
13537   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13538 
13539   format %{ "xorl    $dst, $src\t# int" %}
13540   ins_encode %{
13541     __ xorl($dst$$Register, $src$$Register);
13542   %}
13543   ins_pipe(ialu_reg_reg);
13544 %}
13545 
13546 // Xor Register with Register using New Data Destination (NDD)
13547 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13548 %{
13549   predicate(UseAPX);
13550   match(Set dst (XorI src1 src2));
13551   effect(KILL cr);
13552   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13553 
13554   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13555   ins_encode %{
13556     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13557   %}
13558   ins_pipe(ialu_reg_reg);
13559 %}
13560 
13561 // Xor Register with Immediate -1
13562 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13563 %{
13564   predicate(!UseAPX);
13565   match(Set dst (XorI dst imm));
13566 
13567   format %{ "notl    $dst" %}
13568   ins_encode %{
13569      __ notl($dst$$Register);
13570   %}
13571   ins_pipe(ialu_reg);
13572 %}
13573 
13574 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13575 %{
13576   match(Set dst (XorI src imm));
13577   predicate(UseAPX);
13578   flag(PD::Flag_ndd_demotable_opr1);
13579 
13580   format %{ "enotl    $dst, $src" %}
13581   ins_encode %{
13582      __ enotl($dst$$Register, $src$$Register);
13583   %}
13584   ins_pipe(ialu_reg);
13585 %}
13586 
13587 // Xor Register with Immediate
13588 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13589 %{
13590   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13591   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13592   match(Set dst (XorI dst src));
13593   effect(KILL cr);
13594   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13595 
13596   format %{ "xorl    $dst, $src\t# int" %}
13597   ins_encode %{
13598     __ xorl($dst$$Register, $src$$constant);
13599   %}
13600   ins_pipe(ialu_reg);
13601 %}
13602 
13603 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13604 %{
13605   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13606   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13607   match(Set dst (XorI src1 src2));
13608   effect(KILL cr);
13609   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13610 
13611   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13612   ins_encode %{
13613     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13614   %}
13615   ins_pipe(ialu_reg);
13616 %}
13617 
13618 // Xor Memory with Immediate
13619 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13620 %{
13621   predicate(UseAPX);
13622   match(Set dst (XorI (LoadI src1) src2));
13623   effect(KILL cr);
13624   ins_cost(150);
13625   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13626 
13627   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13628   ins_encode %{
13629     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13630   %}
13631   ins_pipe(ialu_reg);
13632 %}
13633 
13634 // Xor Register with Memory
13635 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13636 %{
13637   predicate(!UseAPX);
13638   match(Set dst (XorI dst (LoadI src)));
13639   effect(KILL cr);
13640   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13641 
13642   ins_cost(150);
13643   format %{ "xorl    $dst, $src\t# int" %}
13644   ins_encode %{
13645     __ xorl($dst$$Register, $src$$Address);
13646   %}
13647   ins_pipe(ialu_reg_mem);
13648 %}
13649 
13650 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13651 %{
13652   predicate(UseAPX);
13653   match(Set dst (XorI src1 (LoadI src2)));
13654   effect(KILL cr);
13655   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13656 
13657   ins_cost(150);
13658   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13659   ins_encode %{
13660     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13661   %}
13662   ins_pipe(ialu_reg_mem);
13663 %}
13664 
13665 // Xor Memory with Register
13666 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13667 %{
13668   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13669   effect(KILL cr);
13670   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13671 
13672   ins_cost(150);
13673   format %{ "xorb    $dst, $src\t# byte" %}
13674   ins_encode %{
13675     __ xorb($dst$$Address, $src$$Register);
13676   %}
13677   ins_pipe(ialu_mem_reg);
13678 %}
13679 
13680 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13681 %{
13682   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13683   effect(KILL cr);
13684   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13685 
13686   ins_cost(150);
13687   format %{ "xorl    $dst, $src\t# int" %}
13688   ins_encode %{
13689     __ xorl($dst$$Address, $src$$Register);
13690   %}
13691   ins_pipe(ialu_mem_reg);
13692 %}
13693 
13694 // Xor Memory with Immediate
13695 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13696 %{
13697   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13698   effect(KILL cr);
13699   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13700 
13701   ins_cost(125);
13702   format %{ "xorl    $dst, $src\t# int" %}
13703   ins_encode %{
13704     __ xorl($dst$$Address, $src$$constant);
13705   %}
13706   ins_pipe(ialu_mem_imm);
13707 %}
13708 
13709 
13710 // Long Logical Instructions
13711 
13712 // And Instructions
13713 // And Register with Register
13714 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13715 %{
13716   predicate(!UseAPX);
13717   match(Set dst (AndL dst src));
13718   effect(KILL cr);
13719   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13720 
13721   format %{ "andq    $dst, $src\t# long" %}
13722   ins_encode %{
13723     __ andq($dst$$Register, $src$$Register);
13724   %}
13725   ins_pipe(ialu_reg_reg);
13726 %}
13727 
13728 // And Register with Register using New Data Destination (NDD)
13729 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13730 %{
13731   predicate(UseAPX);
13732   match(Set dst (AndL src1 src2));
13733   effect(KILL cr);
13734   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13735 
13736   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13737   ins_encode %{
13738     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13739 
13740   %}
13741   ins_pipe(ialu_reg_reg);
13742 %}
13743 
13744 // And Register with Immediate 255
13745 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13746 %{
13747   match(Set dst (AndL src mask));
13748 
13749   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13750   ins_encode %{
13751     // movzbl zeroes out the upper 32-bit and does not need REX.W
13752     __ movzbl($dst$$Register, $src$$Register);
13753   %}
13754   ins_pipe(ialu_reg);
13755 %}
13756 
13757 // And Register with Immediate 65535
13758 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13759 %{
13760   match(Set dst (AndL src mask));
13761 
13762   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13763   ins_encode %{
13764     // movzwl zeroes out the upper 32-bit and does not need REX.W
13765     __ movzwl($dst$$Register, $src$$Register);
13766   %}
13767   ins_pipe(ialu_reg);
13768 %}
13769 
13770 // And Register with Immediate
13771 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13772 %{
13773   predicate(!UseAPX);
13774   match(Set dst (AndL dst src));
13775   effect(KILL cr);
13776   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13777 
13778   format %{ "andq    $dst, $src\t# long" %}
13779   ins_encode %{
13780     __ andq($dst$$Register, $src$$constant);
13781   %}
13782   ins_pipe(ialu_reg);
13783 %}
13784 
13785 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13786 %{
13787   predicate(UseAPX);
13788   match(Set dst (AndL src1 src2));
13789   effect(KILL cr);
13790   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13791 
13792   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13793   ins_encode %{
13794     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13795   %}
13796   ins_pipe(ialu_reg);
13797 %}
13798 
13799 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13800 %{
13801   predicate(UseAPX);
13802   match(Set dst (AndL (LoadL src1) src2));
13803   effect(KILL cr);
13804   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13805 
13806   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13807   ins_encode %{
13808     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13809   %}
13810   ins_pipe(ialu_reg);
13811 %}
13812 
13813 // And Register with Memory
13814 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13815 %{
13816   predicate(!UseAPX);
13817   match(Set dst (AndL dst (LoadL src)));
13818   effect(KILL cr);
13819   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13820 
13821   ins_cost(150);
13822   format %{ "andq    $dst, $src\t# long" %}
13823   ins_encode %{
13824     __ andq($dst$$Register, $src$$Address);
13825   %}
13826   ins_pipe(ialu_reg_mem);
13827 %}
13828 
13829 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13830 %{
13831   predicate(UseAPX);
13832   match(Set dst (AndL src1 (LoadL src2)));
13833   effect(KILL cr);
13834   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13835 
13836   ins_cost(150);
13837   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13838   ins_encode %{
13839     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13840   %}
13841   ins_pipe(ialu_reg_mem);
13842 %}
13843 
13844 // And Memory with Register
13845 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13846 %{
13847   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13848   effect(KILL cr);
13849   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13850 
13851   ins_cost(150);
13852   format %{ "andq    $dst, $src\t# long" %}
13853   ins_encode %{
13854     __ andq($dst$$Address, $src$$Register);
13855   %}
13856   ins_pipe(ialu_mem_reg);
13857 %}
13858 
13859 // And Memory with Immediate
13860 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13861 %{
13862   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13863   effect(KILL cr);
13864   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13865 
13866   ins_cost(125);
13867   format %{ "andq    $dst, $src\t# long" %}
13868   ins_encode %{
13869     __ andq($dst$$Address, $src$$constant);
13870   %}
13871   ins_pipe(ialu_mem_imm);
13872 %}
13873 
13874 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13875 %{
13876   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13877   // because AND/OR works well enough for 8/32-bit values.
13878   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13879 
13880   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13881   effect(KILL cr);
13882 
13883   ins_cost(125);
13884   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13885   ins_encode %{
13886     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13887   %}
13888   ins_pipe(ialu_mem_imm);
13889 %}
13890 
13891 // BMI1 instructions
13892 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13893   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13894   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13895   effect(KILL cr);
13896   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13897 
13898   ins_cost(125);
13899   format %{ "andnq  $dst, $src1, $src2" %}
13900 
13901   ins_encode %{
13902     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13903   %}
13904   ins_pipe(ialu_reg_mem);
13905 %}
13906 
13907 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13908   match(Set dst (AndL (XorL src1 minus_1) src2));
13909   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13910   effect(KILL cr);
13911   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13912 
13913   format %{ "andnq  $dst, $src1, $src2" %}
13914 
13915   ins_encode %{
13916   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13917   %}
13918   ins_pipe(ialu_reg_mem);
13919 %}
13920 
13921 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13922   match(Set dst (AndL (SubL imm_zero src) src));
13923   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13924   effect(KILL cr);
13925   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13926 
13927   format %{ "blsiq  $dst, $src" %}
13928 
13929   ins_encode %{
13930     __ blsiq($dst$$Register, $src$$Register);
13931   %}
13932   ins_pipe(ialu_reg);
13933 %}
13934 
13935 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13936   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13937   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13938   effect(KILL cr);
13939   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13940 
13941   ins_cost(125);
13942   format %{ "blsiq  $dst, $src" %}
13943 
13944   ins_encode %{
13945     __ blsiq($dst$$Register, $src$$Address);
13946   %}
13947   ins_pipe(ialu_reg_mem);
13948 %}
13949 
13950 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13951 %{
13952   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13953   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13954   effect(KILL cr);
13955   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13956 
13957   ins_cost(125);
13958   format %{ "blsmskq $dst, $src" %}
13959 
13960   ins_encode %{
13961     __ blsmskq($dst$$Register, $src$$Address);
13962   %}
13963   ins_pipe(ialu_reg_mem);
13964 %}
13965 
13966 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13967 %{
13968   match(Set dst (XorL (AddL src minus_1) src));
13969   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13970   effect(KILL cr);
13971   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13972 
13973   format %{ "blsmskq $dst, $src" %}
13974 
13975   ins_encode %{
13976     __ blsmskq($dst$$Register, $src$$Register);
13977   %}
13978 
13979   ins_pipe(ialu_reg);
13980 %}
13981 
13982 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13983 %{
13984   match(Set dst (AndL (AddL src minus_1) src) );
13985   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13986   effect(KILL cr);
13987   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13988 
13989   format %{ "blsrq  $dst, $src" %}
13990 
13991   ins_encode %{
13992     __ blsrq($dst$$Register, $src$$Register);
13993   %}
13994 
13995   ins_pipe(ialu_reg);
13996 %}
13997 
13998 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13999 %{
14000   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
14001   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
14002   effect(KILL cr);
14003   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
14004 
14005   ins_cost(125);
14006   format %{ "blsrq  $dst, $src" %}
14007 
14008   ins_encode %{
14009     __ blsrq($dst$$Register, $src$$Address);
14010   %}
14011 
14012   ins_pipe(ialu_reg);
14013 %}
14014 
14015 // Or Instructions
14016 // Or Register with Register
14017 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14018 %{
14019   predicate(!UseAPX);
14020   match(Set dst (OrL dst src));
14021   effect(KILL cr);
14022   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14023 
14024   format %{ "orq     $dst, $src\t# long" %}
14025   ins_encode %{
14026     __ orq($dst$$Register, $src$$Register);
14027   %}
14028   ins_pipe(ialu_reg_reg);
14029 %}
14030 
14031 // Or Register with Register using New Data Destination (NDD)
14032 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14033 %{
14034   predicate(UseAPX);
14035   match(Set dst (OrL src1 src2));
14036   effect(KILL cr);
14037   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14038 
14039   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14040   ins_encode %{
14041     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14042 
14043   %}
14044   ins_pipe(ialu_reg_reg);
14045 %}
14046 
14047 // Use any_RegP to match R15 (TLS register) without spilling.
14048 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
14049   predicate(!UseAPX);
14050   match(Set dst (OrL dst (CastP2X src)));
14051   effect(KILL cr);
14052   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14053 
14054   format %{ "orq     $dst, $src\t# long" %}
14055   ins_encode %{
14056     __ orq($dst$$Register, $src$$Register);
14057   %}
14058   ins_pipe(ialu_reg_reg);
14059 %}
14060 
14061 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
14062   predicate(UseAPX);
14063   match(Set dst (OrL src1 (CastP2X src2)));
14064   effect(KILL cr);
14065   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14066 
14067   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14068   ins_encode %{
14069     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14070   %}
14071   ins_pipe(ialu_reg_reg);
14072 %}
14073 
14074 // Or Register with Immediate
14075 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14076 %{
14077   predicate(!UseAPX);
14078   match(Set dst (OrL dst src));
14079   effect(KILL cr);
14080   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14081 
14082   format %{ "orq     $dst, $src\t# long" %}
14083   ins_encode %{
14084     __ orq($dst$$Register, $src$$constant);
14085   %}
14086   ins_pipe(ialu_reg);
14087 %}
14088 
14089 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14090 %{
14091   predicate(UseAPX);
14092   match(Set dst (OrL src1 src2));
14093   effect(KILL cr);
14094   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14095 
14096   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14097   ins_encode %{
14098     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14099   %}
14100   ins_pipe(ialu_reg);
14101 %}
14102 
14103 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14104 %{
14105   predicate(UseAPX);
14106   match(Set dst (OrL src1 src2));
14107   effect(KILL cr);
14108   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14109 
14110   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
14111   ins_encode %{
14112     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14113   %}
14114   ins_pipe(ialu_reg);
14115 %}
14116 
14117 // Or Memory with Immediate
14118 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14119 %{
14120   predicate(UseAPX);
14121   match(Set dst (OrL (LoadL src1) src2));
14122   effect(KILL cr);
14123   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14124 
14125   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14126   ins_encode %{
14127     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14128   %}
14129   ins_pipe(ialu_reg);
14130 %}
14131 
14132 // Or Register with Memory
14133 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14134 %{
14135   predicate(!UseAPX);
14136   match(Set dst (OrL dst (LoadL src)));
14137   effect(KILL cr);
14138   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14139 
14140   ins_cost(150);
14141   format %{ "orq     $dst, $src\t# long" %}
14142   ins_encode %{
14143     __ orq($dst$$Register, $src$$Address);
14144   %}
14145   ins_pipe(ialu_reg_mem);
14146 %}
14147 
14148 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14149 %{
14150   predicate(UseAPX);
14151   match(Set dst (OrL src1 (LoadL src2)));
14152   effect(KILL cr);
14153   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14154 
14155   ins_cost(150);
14156   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14157   ins_encode %{
14158     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14159   %}
14160   ins_pipe(ialu_reg_mem);
14161 %}
14162 
14163 // Or Memory with Register
14164 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14165 %{
14166   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14167   effect(KILL cr);
14168   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14169 
14170   ins_cost(150);
14171   format %{ "orq     $dst, $src\t# long" %}
14172   ins_encode %{
14173     __ orq($dst$$Address, $src$$Register);
14174   %}
14175   ins_pipe(ialu_mem_reg);
14176 %}
14177 
14178 // Or Memory with Immediate
14179 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14180 %{
14181   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14182   effect(KILL cr);
14183   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14184 
14185   ins_cost(125);
14186   format %{ "orq     $dst, $src\t# long" %}
14187   ins_encode %{
14188     __ orq($dst$$Address, $src$$constant);
14189   %}
14190   ins_pipe(ialu_mem_imm);
14191 %}
14192 
14193 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14194 %{
14195   // con should be a pure 64-bit power of 2 immediate
14196   // because AND/OR works well enough for 8/32-bit values.
14197   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14198 
14199   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14200   effect(KILL cr);
14201 
14202   ins_cost(125);
14203   format %{ "btsq    $dst, log2($con)\t# long" %}
14204   ins_encode %{
14205     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14206   %}
14207   ins_pipe(ialu_mem_imm);
14208 %}
14209 
14210 // Xor Instructions
14211 // Xor Register with Register
14212 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14213 %{
14214   predicate(!UseAPX);
14215   match(Set dst (XorL dst src));
14216   effect(KILL cr);
14217   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14218 
14219   format %{ "xorq    $dst, $src\t# long" %}
14220   ins_encode %{
14221     __ xorq($dst$$Register, $src$$Register);
14222   %}
14223   ins_pipe(ialu_reg_reg);
14224 %}
14225 
14226 // Xor Register with Register using New Data Destination (NDD)
14227 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14228 %{
14229   predicate(UseAPX);
14230   match(Set dst (XorL src1 src2));
14231   effect(KILL cr);
14232   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14233 
14234   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14235   ins_encode %{
14236     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14237   %}
14238   ins_pipe(ialu_reg_reg);
14239 %}
14240 
14241 // Xor Register with Immediate -1
14242 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14243 %{
14244   predicate(!UseAPX);
14245   match(Set dst (XorL dst imm));
14246 
14247   format %{ "notq   $dst" %}
14248   ins_encode %{
14249      __ notq($dst$$Register);
14250   %}
14251   ins_pipe(ialu_reg);
14252 %}
14253 
14254 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14255 %{
14256   predicate(UseAPX);
14257   match(Set dst (XorL src imm));
14258   flag(PD::Flag_ndd_demotable_opr1);
14259 
14260   format %{ "enotq   $dst, $src" %}
14261   ins_encode %{
14262     __ enotq($dst$$Register, $src$$Register);
14263   %}
14264   ins_pipe(ialu_reg);
14265 %}
14266 
14267 // Xor Register with Immediate
14268 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14269 %{
14270   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14271   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14272   match(Set dst (XorL dst src));
14273   effect(KILL cr);
14274   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14275 
14276   format %{ "xorq    $dst, $src\t# long" %}
14277   ins_encode %{
14278     __ xorq($dst$$Register, $src$$constant);
14279   %}
14280   ins_pipe(ialu_reg);
14281 %}
14282 
14283 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14284 %{
14285   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14286   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14287   match(Set dst (XorL src1 src2));
14288   effect(KILL cr);
14289   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14290 
14291   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14292   ins_encode %{
14293     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14294   %}
14295   ins_pipe(ialu_reg);
14296 %}
14297 
14298 // Xor Memory with Immediate
14299 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14300 %{
14301   predicate(UseAPX);
14302   match(Set dst (XorL (LoadL src1) src2));
14303   effect(KILL cr);
14304   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14305   ins_cost(150);
14306 
14307   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14308   ins_encode %{
14309     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14310   %}
14311   ins_pipe(ialu_reg);
14312 %}
14313 
14314 // Xor Register with Memory
14315 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14316 %{
14317   predicate(!UseAPX);
14318   match(Set dst (XorL dst (LoadL src)));
14319   effect(KILL cr);
14320   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14321 
14322   ins_cost(150);
14323   format %{ "xorq    $dst, $src\t# long" %}
14324   ins_encode %{
14325     __ xorq($dst$$Register, $src$$Address);
14326   %}
14327   ins_pipe(ialu_reg_mem);
14328 %}
14329 
14330 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14331 %{
14332   predicate(UseAPX);
14333   match(Set dst (XorL src1 (LoadL src2)));
14334   effect(KILL cr);
14335   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14336 
14337   ins_cost(150);
14338   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14339   ins_encode %{
14340     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14341   %}
14342   ins_pipe(ialu_reg_mem);
14343 %}
14344 
14345 // Xor Memory with Register
14346 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14347 %{
14348   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14349   effect(KILL cr);
14350   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14351 
14352   ins_cost(150);
14353   format %{ "xorq    $dst, $src\t# long" %}
14354   ins_encode %{
14355     __ xorq($dst$$Address, $src$$Register);
14356   %}
14357   ins_pipe(ialu_mem_reg);
14358 %}
14359 
14360 // Xor Memory with Immediate
14361 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14362 %{
14363   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14364   effect(KILL cr);
14365   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14366 
14367   ins_cost(125);
14368   format %{ "xorq    $dst, $src\t# long" %}
14369   ins_encode %{
14370     __ xorq($dst$$Address, $src$$constant);
14371   %}
14372   ins_pipe(ialu_mem_imm);
14373 %}
14374 
14375 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14376 %{
14377   match(Set dst (CmpLTMask p q));
14378   effect(KILL cr);
14379 
14380   ins_cost(400);
14381   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14382             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14383             "negl    $dst" %}
14384   ins_encode %{
14385     __ cmpl($p$$Register, $q$$Register);
14386     __ setcc(Assembler::less, $dst$$Register);
14387     __ negl($dst$$Register);
14388   %}
14389   ins_pipe(pipe_slow);
14390 %}
14391 
14392 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14393 %{
14394   match(Set dst (CmpLTMask dst zero));
14395   effect(KILL cr);
14396 
14397   ins_cost(100);
14398   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14399   ins_encode %{
14400     __ sarl($dst$$Register, 31);
14401   %}
14402   ins_pipe(ialu_reg);
14403 %}
14404 
14405 /* Better to save a register than avoid a branch */
14406 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14407 %{
14408   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14409   effect(KILL cr);
14410   ins_cost(300);
14411   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14412             "jge     done\n\t"
14413             "addl    $p,$y\n"
14414             "done:   " %}
14415   ins_encode %{
14416     Register Rp = $p$$Register;
14417     Register Rq = $q$$Register;
14418     Register Ry = $y$$Register;
14419     Label done;
14420     __ subl(Rp, Rq);
14421     __ jccb(Assembler::greaterEqual, done);
14422     __ addl(Rp, Ry);
14423     __ bind(done);
14424   %}
14425   ins_pipe(pipe_cmplt);
14426 %}
14427 
14428 /* Better to save a register than avoid a branch */
14429 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14430 %{
14431   match(Set y (AndI (CmpLTMask p q) y));
14432   effect(KILL cr);
14433 
14434   ins_cost(300);
14435 
14436   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14437             "jlt     done\n\t"
14438             "xorl    $y, $y\n"
14439             "done:   " %}
14440   ins_encode %{
14441     Register Rp = $p$$Register;
14442     Register Rq = $q$$Register;
14443     Register Ry = $y$$Register;
14444     Label done;
14445     __ cmpl(Rp, Rq);
14446     __ jccb(Assembler::less, done);
14447     __ xorl(Ry, Ry);
14448     __ bind(done);
14449   %}
14450   ins_pipe(pipe_cmplt);
14451 %}
14452 
14453 
14454 //---------- FP Instructions------------------------------------------------
14455 
14456 // Really expensive, avoid
14457 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14458 %{
14459   match(Set cr (CmpF src1 src2));
14460 
14461   ins_cost(500);
14462   format %{ "ucomiss $src1, $src2\n\t"
14463             "jnp,s   exit\n\t"
14464             "pushfq\t# saw NaN, set CF\n\t"
14465             "andq    [rsp], #0xffffff2b\n\t"
14466             "popfq\n"
14467     "exit:" %}
14468   ins_encode %{
14469     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14470     emit_cmpfp_fixup(masm);
14471   %}
14472   ins_pipe(pipe_slow);
14473 %}
14474 
14475 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14476   match(Set cr (CmpF src1 src2));
14477 
14478   ins_cost(100);
14479   format %{ "ucomiss $src1, $src2" %}
14480   ins_encode %{
14481     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14482   %}
14483   ins_pipe(pipe_slow);
14484 %}
14485 
14486 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14487   match(Set cr (CmpF src1 src2));
14488 
14489   ins_cost(100);
14490   format %{ "evucomxss $src1, $src2" %}
14491   ins_encode %{
14492     __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14493   %}
14494   ins_pipe(pipe_slow);
14495 %}
14496 
14497 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14498   match(Set cr (CmpF src1 (LoadF src2)));
14499 
14500   ins_cost(100);
14501   format %{ "ucomiss $src1, $src2" %}
14502   ins_encode %{
14503     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14504   %}
14505   ins_pipe(pipe_slow);
14506 %}
14507 
14508 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14509   match(Set cr (CmpF src1 (LoadF src2)));
14510 
14511   ins_cost(100);
14512   format %{ "evucomxss $src1, $src2" %}
14513   ins_encode %{
14514     __ evucomxss($src1$$XMMRegister, $src2$$Address);
14515   %}
14516   ins_pipe(pipe_slow);
14517 %}
14518 
14519 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14520   match(Set cr (CmpF src con));
14521 
14522   ins_cost(100);
14523   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14524   ins_encode %{
14525     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14526   %}
14527   ins_pipe(pipe_slow);
14528 %}
14529 
14530 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14531   match(Set cr (CmpF src con));
14532 
14533   ins_cost(100);
14534   format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14535   ins_encode %{
14536     __ evucomxss($src$$XMMRegister, $constantaddress($con));
14537   %}
14538   ins_pipe(pipe_slow);
14539 %}
14540 
14541 // Really expensive, avoid
14542 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14543 %{
14544   match(Set cr (CmpD src1 src2));
14545 
14546   ins_cost(500);
14547   format %{ "ucomisd $src1, $src2\n\t"
14548             "jnp,s   exit\n\t"
14549             "pushfq\t# saw NaN, set CF\n\t"
14550             "andq    [rsp], #0xffffff2b\n\t"
14551             "popfq\n"
14552     "exit:" %}
14553   ins_encode %{
14554     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14555     emit_cmpfp_fixup(masm);
14556   %}
14557   ins_pipe(pipe_slow);
14558 %}
14559 
14560 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14561   match(Set cr (CmpD src1 src2));
14562 
14563   ins_cost(100);
14564   format %{ "ucomisd $src1, $src2 test" %}
14565   ins_encode %{
14566     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14567   %}
14568   ins_pipe(pipe_slow);
14569 %}
14570 
14571 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14572   match(Set cr (CmpD src1 src2));
14573 
14574   ins_cost(100);
14575   format %{ "evucomxsd $src1, $src2 test" %}
14576   ins_encode %{
14577     __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14578   %}
14579   ins_pipe(pipe_slow);
14580 %}
14581 
14582 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14583   match(Set cr (CmpD src1 (LoadD src2)));
14584 
14585   ins_cost(100);
14586   format %{ "ucomisd $src1, $src2" %}
14587   ins_encode %{
14588     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14589   %}
14590   ins_pipe(pipe_slow);
14591 %}
14592 
14593 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14594   match(Set cr (CmpD src1 (LoadD src2)));
14595 
14596   ins_cost(100);
14597   format %{ "evucomxsd $src1, $src2" %}
14598   ins_encode %{
14599     __ evucomxsd($src1$$XMMRegister, $src2$$Address);
14600   %}
14601   ins_pipe(pipe_slow);
14602 %}
14603 
14604 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14605   match(Set cr (CmpD src con));
14606   ins_cost(100);
14607   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14608   ins_encode %{
14609     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14610   %}
14611   ins_pipe(pipe_slow);
14612 %}
14613 
14614 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14615   match(Set cr (CmpD src con));
14616 
14617   ins_cost(100);
14618   format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14619   ins_encode %{
14620     __ evucomxsd($src$$XMMRegister, $constantaddress($con));
14621   %}
14622   ins_pipe(pipe_slow);
14623 %}
14624 
14625 // Compare into -1,0,1
14626 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14627 %{
14628   match(Set dst (CmpF3 src1 src2));
14629   effect(KILL cr);
14630 
14631   ins_cost(275);
14632   format %{ "ucomiss $src1, $src2\n\t"
14633             "movl    $dst, #-1\n\t"
14634             "jp,s    done\n\t"
14635             "jb,s    done\n\t"
14636             "setne   $dst\n\t"
14637             "movzbl  $dst, $dst\n"
14638     "done:" %}
14639   ins_encode %{
14640     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14641     emit_cmpfp3(masm, $dst$$Register);
14642   %}
14643   ins_pipe(pipe_slow);
14644 %}
14645 
14646 // Compare into -1,0,1
14647 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14648 %{
14649   match(Set dst (CmpF3 src1 (LoadF src2)));
14650   effect(KILL cr);
14651 
14652   ins_cost(275);
14653   format %{ "ucomiss $src1, $src2\n\t"
14654             "movl    $dst, #-1\n\t"
14655             "jp,s    done\n\t"
14656             "jb,s    done\n\t"
14657             "setne   $dst\n\t"
14658             "movzbl  $dst, $dst\n"
14659     "done:" %}
14660   ins_encode %{
14661     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14662     emit_cmpfp3(masm, $dst$$Register);
14663   %}
14664   ins_pipe(pipe_slow);
14665 %}
14666 
14667 // Compare into -1,0,1
14668 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14669   match(Set dst (CmpF3 src con));
14670   effect(KILL cr);
14671 
14672   ins_cost(275);
14673   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14674             "movl    $dst, #-1\n\t"
14675             "jp,s    done\n\t"
14676             "jb,s    done\n\t"
14677             "setne   $dst\n\t"
14678             "movzbl  $dst, $dst\n"
14679     "done:" %}
14680   ins_encode %{
14681     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14682     emit_cmpfp3(masm, $dst$$Register);
14683   %}
14684   ins_pipe(pipe_slow);
14685 %}
14686 
14687 // Compare into -1,0,1
14688 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14689 %{
14690   match(Set dst (CmpD3 src1 src2));
14691   effect(KILL cr);
14692 
14693   ins_cost(275);
14694   format %{ "ucomisd $src1, $src2\n\t"
14695             "movl    $dst, #-1\n\t"
14696             "jp,s    done\n\t"
14697             "jb,s    done\n\t"
14698             "setne   $dst\n\t"
14699             "movzbl  $dst, $dst\n"
14700     "done:" %}
14701   ins_encode %{
14702     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14703     emit_cmpfp3(masm, $dst$$Register);
14704   %}
14705   ins_pipe(pipe_slow);
14706 %}
14707 
14708 // Compare into -1,0,1
14709 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14710 %{
14711   match(Set dst (CmpD3 src1 (LoadD src2)));
14712   effect(KILL cr);
14713 
14714   ins_cost(275);
14715   format %{ "ucomisd $src1, $src2\n\t"
14716             "movl    $dst, #-1\n\t"
14717             "jp,s    done\n\t"
14718             "jb,s    done\n\t"
14719             "setne   $dst\n\t"
14720             "movzbl  $dst, $dst\n"
14721     "done:" %}
14722   ins_encode %{
14723     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14724     emit_cmpfp3(masm, $dst$$Register);
14725   %}
14726   ins_pipe(pipe_slow);
14727 %}
14728 
14729 // Compare into -1,0,1
14730 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14731   match(Set dst (CmpD3 src con));
14732   effect(KILL cr);
14733 
14734   ins_cost(275);
14735   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14736             "movl    $dst, #-1\n\t"
14737             "jp,s    done\n\t"
14738             "jb,s    done\n\t"
14739             "setne   $dst\n\t"
14740             "movzbl  $dst, $dst\n"
14741     "done:" %}
14742   ins_encode %{
14743     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14744     emit_cmpfp3(masm, $dst$$Register);
14745   %}
14746   ins_pipe(pipe_slow);
14747 %}
14748 
14749 //----------Arithmetic Conversion Instructions---------------------------------
14750 
14751 instruct convF2D_reg_reg(regD dst, regF src)
14752 %{
14753   match(Set dst (ConvF2D src));
14754 
14755   format %{ "cvtss2sd $dst, $src" %}
14756   ins_encode %{
14757     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14758   %}
14759   ins_pipe(pipe_slow); // XXX
14760 %}
14761 
14762 instruct convF2D_reg_mem(regD dst, memory src)
14763 %{
14764   predicate(UseAVX == 0);
14765   match(Set dst (ConvF2D (LoadF src)));
14766 
14767   format %{ "cvtss2sd $dst, $src" %}
14768   ins_encode %{
14769     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14770   %}
14771   ins_pipe(pipe_slow); // XXX
14772 %}
14773 
14774 instruct convD2F_reg_reg(regF dst, regD src)
14775 %{
14776   match(Set dst (ConvD2F src));
14777 
14778   format %{ "cvtsd2ss $dst, $src" %}
14779   ins_encode %{
14780     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14781   %}
14782   ins_pipe(pipe_slow); // XXX
14783 %}
14784 
14785 instruct convD2F_reg_mem(regF dst, memory src)
14786 %{
14787   predicate(UseAVX == 0);
14788   match(Set dst (ConvD2F (LoadD src)));
14789 
14790   format %{ "cvtsd2ss $dst, $src" %}
14791   ins_encode %{
14792     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14793   %}
14794   ins_pipe(pipe_slow); // XXX
14795 %}
14796 
14797 // XXX do mem variants
14798 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14799 %{
14800   predicate(!VM_Version::supports_avx10_2());
14801   match(Set dst (ConvF2I src));
14802   effect(KILL cr);
14803   format %{ "convert_f2i $dst, $src" %}
14804   ins_encode %{
14805     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14806   %}
14807   ins_pipe(pipe_slow);
14808 %}
14809 
14810 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14811 %{
14812   predicate(VM_Version::supports_avx10_2());
14813   match(Set dst (ConvF2I src));
14814   format %{ "evcvttss2sisl $dst, $src" %}
14815   ins_encode %{
14816     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14817   %}
14818   ins_pipe(pipe_slow);
14819 %}
14820 
14821 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14822 %{
14823   predicate(VM_Version::supports_avx10_2());
14824   match(Set dst (ConvF2I (LoadF src)));
14825   format %{ "evcvttss2sisl $dst, $src" %}
14826   ins_encode %{
14827     __ evcvttss2sisl($dst$$Register, $src$$Address);
14828   %}
14829   ins_pipe(pipe_slow);
14830 %}
14831 
14832 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14833 %{
14834   predicate(!VM_Version::supports_avx10_2());
14835   match(Set dst (ConvF2L src));
14836   effect(KILL cr);
14837   format %{ "convert_f2l $dst, $src"%}
14838   ins_encode %{
14839     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14840   %}
14841   ins_pipe(pipe_slow);
14842 %}
14843 
14844 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14845 %{
14846   predicate(VM_Version::supports_avx10_2());
14847   match(Set dst (ConvF2L src));
14848   format %{ "evcvttss2sisq $dst, $src" %}
14849   ins_encode %{
14850     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14851   %}
14852   ins_pipe(pipe_slow);
14853 %}
14854 
14855 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14856 %{
14857   predicate(VM_Version::supports_avx10_2());
14858   match(Set dst (ConvF2L (LoadF src)));
14859   format %{ "evcvttss2sisq $dst, $src" %}
14860   ins_encode %{
14861     __ evcvttss2sisq($dst$$Register, $src$$Address);
14862   %}
14863   ins_pipe(pipe_slow);
14864 %}
14865 
14866 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14867 %{
14868   predicate(!VM_Version::supports_avx10_2());
14869   match(Set dst (ConvD2I src));
14870   effect(KILL cr);
14871   format %{ "convert_d2i $dst, $src"%}
14872   ins_encode %{
14873     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14874   %}
14875   ins_pipe(pipe_slow);
14876 %}
14877 
14878 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14879 %{
14880   predicate(VM_Version::supports_avx10_2());
14881   match(Set dst (ConvD2I src));
14882   format %{ "evcvttsd2sisl $dst, $src" %}
14883   ins_encode %{
14884     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14885   %}
14886   ins_pipe(pipe_slow);
14887 %}
14888 
14889 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14890 %{
14891   predicate(VM_Version::supports_avx10_2());
14892   match(Set dst (ConvD2I (LoadD src)));
14893   format %{ "evcvttsd2sisl $dst, $src" %}
14894   ins_encode %{
14895     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14896   %}
14897   ins_pipe(pipe_slow);
14898 %}
14899 
14900 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14901 %{
14902   predicate(!VM_Version::supports_avx10_2());
14903   match(Set dst (ConvD2L src));
14904   effect(KILL cr);
14905   format %{ "convert_d2l $dst, $src"%}
14906   ins_encode %{
14907     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14908   %}
14909   ins_pipe(pipe_slow);
14910 %}
14911 
14912 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14913 %{
14914   predicate(VM_Version::supports_avx10_2());
14915   match(Set dst (ConvD2L src));
14916   format %{ "evcvttsd2sisq $dst, $src" %}
14917   ins_encode %{
14918     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14919   %}
14920   ins_pipe(pipe_slow);
14921 %}
14922 
14923 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14924 %{
14925   predicate(VM_Version::supports_avx10_2());
14926   match(Set dst (ConvD2L (LoadD src)));
14927   format %{ "evcvttsd2sisq $dst, $src" %}
14928   ins_encode %{
14929     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14930   %}
14931   ins_pipe(pipe_slow);
14932 %}
14933 
14934 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14935 %{
14936   match(Set dst (RoundD src));
14937   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14938   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14939   ins_encode %{
14940     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14941   %}
14942   ins_pipe(pipe_slow);
14943 %}
14944 
14945 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14946 %{
14947   match(Set dst (RoundF src));
14948   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14949   format %{ "round_float $dst,$src" %}
14950   ins_encode %{
14951     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14952   %}
14953   ins_pipe(pipe_slow);
14954 %}
14955 
14956 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14957 %{
14958   predicate(!UseXmmI2F);
14959   match(Set dst (ConvI2F src));
14960 
14961   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14962   ins_encode %{
14963     if (UseAVX > 0) {
14964       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14965     }
14966     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14967   %}
14968   ins_pipe(pipe_slow); // XXX
14969 %}
14970 
14971 instruct convI2F_reg_mem(regF dst, memory src)
14972 %{
14973   predicate(UseAVX == 0);
14974   match(Set dst (ConvI2F (LoadI src)));
14975 
14976   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14977   ins_encode %{
14978     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14979   %}
14980   ins_pipe(pipe_slow); // XXX
14981 %}
14982 
14983 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14984 %{
14985   predicate(!UseXmmI2D);
14986   match(Set dst (ConvI2D src));
14987 
14988   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14989   ins_encode %{
14990     if (UseAVX > 0) {
14991       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14992     }
14993     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14994   %}
14995   ins_pipe(pipe_slow); // XXX
14996 %}
14997 
14998 instruct convI2D_reg_mem(regD dst, memory src)
14999 %{
15000   predicate(UseAVX == 0);
15001   match(Set dst (ConvI2D (LoadI src)));
15002 
15003   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
15004   ins_encode %{
15005     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
15006   %}
15007   ins_pipe(pipe_slow); // XXX
15008 %}
15009 
15010 instruct convXI2F_reg(regF dst, rRegI src)
15011 %{
15012   predicate(UseXmmI2F);
15013   match(Set dst (ConvI2F src));
15014 
15015   format %{ "movdl $dst, $src\n\t"
15016             "cvtdq2psl $dst, $dst\t# i2f" %}
15017   ins_encode %{
15018     __ movdl($dst$$XMMRegister, $src$$Register);
15019     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
15020   %}
15021   ins_pipe(pipe_slow); // XXX
15022 %}
15023 
15024 instruct convXI2D_reg(regD dst, rRegI src)
15025 %{
15026   predicate(UseXmmI2D);
15027   match(Set dst (ConvI2D src));
15028 
15029   format %{ "movdl $dst, $src\n\t"
15030             "cvtdq2pdl $dst, $dst\t# i2d" %}
15031   ins_encode %{
15032     __ movdl($dst$$XMMRegister, $src$$Register);
15033     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
15034   %}
15035   ins_pipe(pipe_slow); // XXX
15036 %}
15037 
15038 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
15039 %{
15040   match(Set dst (ConvL2F src));
15041 
15042   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
15043   ins_encode %{
15044     if (UseAVX > 0) {
15045       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
15046     }
15047     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
15048   %}
15049   ins_pipe(pipe_slow); // XXX
15050 %}
15051 
15052 instruct convL2F_reg_mem(regF dst, memory src)
15053 %{
15054   predicate(UseAVX == 0);
15055   match(Set dst (ConvL2F (LoadL src)));
15056 
15057   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
15058   ins_encode %{
15059     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
15060   %}
15061   ins_pipe(pipe_slow); // XXX
15062 %}
15063 
15064 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
15065 %{
15066   match(Set dst (ConvL2D src));
15067 
15068   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15069   ins_encode %{
15070     if (UseAVX > 0) {
15071       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
15072     }
15073     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
15074   %}
15075   ins_pipe(pipe_slow); // XXX
15076 %}
15077 
15078 instruct convL2D_reg_mem(regD dst, memory src)
15079 %{
15080   predicate(UseAVX == 0);
15081   match(Set dst (ConvL2D (LoadL src)));
15082 
15083   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15084   ins_encode %{
15085     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15086   %}
15087   ins_pipe(pipe_slow); // XXX
15088 %}
15089 
15090 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15091 %{
15092   match(Set dst (ConvI2L src));
15093 
15094   ins_cost(125);
15095   format %{ "movslq  $dst, $src\t# i2l" %}
15096   ins_encode %{
15097     __ movslq($dst$$Register, $src$$Register);
15098   %}
15099   ins_pipe(ialu_reg_reg);
15100 %}
15101 
15102 // Zero-extend convert int to long
15103 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15104 %{
15105   match(Set dst (AndL (ConvI2L src) mask));
15106 
15107   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15108   ins_encode %{
15109     if ($dst$$reg != $src$$reg) {
15110       __ movl($dst$$Register, $src$$Register);
15111     }
15112   %}
15113   ins_pipe(ialu_reg_reg);
15114 %}
15115 
15116 // Zero-extend convert int to long
15117 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15118 %{
15119   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15120 
15121   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15122   ins_encode %{
15123     __ movl($dst$$Register, $src$$Address);
15124   %}
15125   ins_pipe(ialu_reg_mem);
15126 %}
15127 
15128 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15129 %{
15130   match(Set dst (AndL src mask));
15131 
15132   format %{ "movl    $dst, $src\t# zero-extend long" %}
15133   ins_encode %{
15134     __ movl($dst$$Register, $src$$Register);
15135   %}
15136   ins_pipe(ialu_reg_reg);
15137 %}
15138 
15139 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15140 %{
15141   match(Set dst (ConvL2I src));
15142 
15143   format %{ "movl    $dst, $src\t# l2i" %}
15144   ins_encode %{
15145     __ movl($dst$$Register, $src$$Register);
15146   %}
15147   ins_pipe(ialu_reg_reg);
15148 %}
15149 
15150 
15151 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15152   match(Set dst (MoveF2I src));
15153   effect(DEF dst, USE src);
15154 
15155   ins_cost(125);
15156   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
15157   ins_encode %{
15158     __ movl($dst$$Register, Address(rsp, $src$$disp));
15159   %}
15160   ins_pipe(ialu_reg_mem);
15161 %}
15162 
15163 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15164   match(Set dst (MoveI2F src));
15165   effect(DEF dst, USE src);
15166 
15167   ins_cost(125);
15168   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
15169   ins_encode %{
15170     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15171   %}
15172   ins_pipe(pipe_slow);
15173 %}
15174 
15175 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15176   match(Set dst (MoveD2L src));
15177   effect(DEF dst, USE src);
15178 
15179   ins_cost(125);
15180   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
15181   ins_encode %{
15182     __ movq($dst$$Register, Address(rsp, $src$$disp));
15183   %}
15184   ins_pipe(ialu_reg_mem);
15185 %}
15186 
15187 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15188   predicate(!UseXmmLoadAndClearUpper);
15189   match(Set dst (MoveL2D src));
15190   effect(DEF dst, USE src);
15191 
15192   ins_cost(125);
15193   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
15194   ins_encode %{
15195     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15196   %}
15197   ins_pipe(pipe_slow);
15198 %}
15199 
15200 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15201   predicate(UseXmmLoadAndClearUpper);
15202   match(Set dst (MoveL2D src));
15203   effect(DEF dst, USE src);
15204 
15205   ins_cost(125);
15206   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
15207   ins_encode %{
15208     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15209   %}
15210   ins_pipe(pipe_slow);
15211 %}
15212 
15213 
15214 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15215   match(Set dst (MoveF2I src));
15216   effect(DEF dst, USE src);
15217 
15218   ins_cost(95); // XXX
15219   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
15220   ins_encode %{
15221     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15222   %}
15223   ins_pipe(pipe_slow);
15224 %}
15225 
15226 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15227   match(Set dst (MoveI2F src));
15228   effect(DEF dst, USE src);
15229 
15230   ins_cost(100);
15231   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
15232   ins_encode %{
15233     __ movl(Address(rsp, $dst$$disp), $src$$Register);
15234   %}
15235   ins_pipe( ialu_mem_reg );
15236 %}
15237 
15238 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15239   match(Set dst (MoveD2L src));
15240   effect(DEF dst, USE src);
15241 
15242   ins_cost(95); // XXX
15243   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
15244   ins_encode %{
15245     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15246   %}
15247   ins_pipe(pipe_slow);
15248 %}
15249 
15250 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15251   match(Set dst (MoveL2D src));
15252   effect(DEF dst, USE src);
15253 
15254   ins_cost(100);
15255   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
15256   ins_encode %{
15257     __ movq(Address(rsp, $dst$$disp), $src$$Register);
15258   %}
15259   ins_pipe(ialu_mem_reg);
15260 %}
15261 
15262 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15263   match(Set dst (MoveF2I src));
15264   effect(DEF dst, USE src);
15265   ins_cost(85);
15266   format %{ "movd    $dst,$src\t# MoveF2I" %}
15267   ins_encode %{
15268     __ movdl($dst$$Register, $src$$XMMRegister);
15269   %}
15270   ins_pipe( pipe_slow );
15271 %}
15272 
15273 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15274   match(Set dst (MoveD2L src));
15275   effect(DEF dst, USE src);
15276   ins_cost(85);
15277   format %{ "movd    $dst,$src\t# MoveD2L" %}
15278   ins_encode %{
15279     __ movdq($dst$$Register, $src$$XMMRegister);
15280   %}
15281   ins_pipe( pipe_slow );
15282 %}
15283 
15284 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15285   match(Set dst (MoveI2F src));
15286   effect(DEF dst, USE src);
15287   ins_cost(100);
15288   format %{ "movd    $dst,$src\t# MoveI2F" %}
15289   ins_encode %{
15290     __ movdl($dst$$XMMRegister, $src$$Register);
15291   %}
15292   ins_pipe( pipe_slow );
15293 %}
15294 
15295 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15296   match(Set dst (MoveL2D src));
15297   effect(DEF dst, USE src);
15298   ins_cost(100);
15299   format %{ "movd    $dst,$src\t# MoveL2D" %}
15300   ins_encode %{
15301      __ movdq($dst$$XMMRegister, $src$$Register);
15302   %}
15303   ins_pipe( pipe_slow );
15304 %}
15305 
15306 
15307 // Fast clearing of an array
15308 // Small non-constant lenght ClearArray for non-AVX512 targets.
15309 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15310                   Universe dummy, rFlagsReg cr)
15311 %{
15312   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15313   match(Set dummy (ClearArray (Binary cnt base) val));
15314   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15315 
15316   format %{ $$template
15317     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15318     $$emit$$"jg      LARGE\n\t"
15319     $$emit$$"dec     rcx\n\t"
15320     $$emit$$"js      DONE\t# Zero length\n\t"
15321     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15322     $$emit$$"dec     rcx\n\t"
15323     $$emit$$"jge     LOOP\n\t"
15324     $$emit$$"jmp     DONE\n\t"
15325     $$emit$$"# LARGE:\n\t"
15326     if (UseFastStosb) {
15327        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15328        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15329     } else if (UseXMMForObjInit) {
15330        $$emit$$"movdq   $tmp, $val\n\t"
15331        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15332        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15333        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15334        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15335        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15336        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15337        $$emit$$"add     0x40,rax\n\t"
15338        $$emit$$"# L_zero_64_bytes:\n\t"
15339        $$emit$$"sub     0x8,rcx\n\t"
15340        $$emit$$"jge     L_loop\n\t"
15341        $$emit$$"add     0x4,rcx\n\t"
15342        $$emit$$"jl      L_tail\n\t"
15343        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15344        $$emit$$"add     0x20,rax\n\t"
15345        $$emit$$"sub     0x4,rcx\n\t"
15346        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15347        $$emit$$"add     0x4,rcx\n\t"
15348        $$emit$$"jle     L_end\n\t"
15349        $$emit$$"dec     rcx\n\t"
15350        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15351        $$emit$$"vmovq   xmm0,(rax)\n\t"
15352        $$emit$$"add     0x8,rax\n\t"
15353        $$emit$$"dec     rcx\n\t"
15354        $$emit$$"jge     L_sloop\n\t"
15355        $$emit$$"# L_end:\n\t"
15356     } else {
15357        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15358     }
15359     $$emit$$"# DONE"
15360   %}
15361   ins_encode %{
15362     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15363                  $tmp$$XMMRegister, false, false);
15364   %}
15365   ins_pipe(pipe_slow);
15366 %}
15367 
15368 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15369                             Universe dummy, rFlagsReg cr)
15370 %{
15371   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15372   match(Set dummy (ClearArray (Binary cnt base) val));
15373   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15374 
15375   format %{ $$template
15376     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15377     $$emit$$"jg      LARGE\n\t"
15378     $$emit$$"dec     rcx\n\t"
15379     $$emit$$"js      DONE\t# Zero length\n\t"
15380     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15381     $$emit$$"dec     rcx\n\t"
15382     $$emit$$"jge     LOOP\n\t"
15383     $$emit$$"jmp     DONE\n\t"
15384     $$emit$$"# LARGE:\n\t"
15385     if (UseXMMForObjInit) {
15386        $$emit$$"movdq   $tmp, $val\n\t"
15387        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15388        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15389        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15390        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15391        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15392        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15393        $$emit$$"add     0x40,rax\n\t"
15394        $$emit$$"# L_zero_64_bytes:\n\t"
15395        $$emit$$"sub     0x8,rcx\n\t"
15396        $$emit$$"jge     L_loop\n\t"
15397        $$emit$$"add     0x4,rcx\n\t"
15398        $$emit$$"jl      L_tail\n\t"
15399        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15400        $$emit$$"add     0x20,rax\n\t"
15401        $$emit$$"sub     0x4,rcx\n\t"
15402        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15403        $$emit$$"add     0x4,rcx\n\t"
15404        $$emit$$"jle     L_end\n\t"
15405        $$emit$$"dec     rcx\n\t"
15406        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15407        $$emit$$"vmovq   xmm0,(rax)\n\t"
15408        $$emit$$"add     0x8,rax\n\t"
15409        $$emit$$"dec     rcx\n\t"
15410        $$emit$$"jge     L_sloop\n\t"
15411        $$emit$$"# L_end:\n\t"
15412     } else {
15413        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15414     }
15415     $$emit$$"# DONE"
15416   %}
15417   ins_encode %{
15418     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15419                  $tmp$$XMMRegister, false, true);
15420   %}
15421   ins_pipe(pipe_slow);
15422 %}
15423 
15424 // Small non-constant length ClearArray for AVX512 targets.
15425 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15426                        Universe dummy, rFlagsReg cr)
15427 %{
15428   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15429   match(Set dummy (ClearArray (Binary cnt base) val));
15430   ins_cost(125);
15431   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15432 
15433   format %{ $$template
15434     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15435     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15436     $$emit$$"jg      LARGE\n\t"
15437     $$emit$$"dec     rcx\n\t"
15438     $$emit$$"js      DONE\t# Zero length\n\t"
15439     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15440     $$emit$$"dec     rcx\n\t"
15441     $$emit$$"jge     LOOP\n\t"
15442     $$emit$$"jmp     DONE\n\t"
15443     $$emit$$"# LARGE:\n\t"
15444     if (UseFastStosb) {
15445        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15446        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15447     } else if (UseXMMForObjInit) {
15448        $$emit$$"mov     rdi,rax\n\t"
15449        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15450        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15451        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15452        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15453        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15454        $$emit$$"add     0x40,rax\n\t"
15455        $$emit$$"# L_zero_64_bytes:\n\t"
15456        $$emit$$"sub     0x8,rcx\n\t"
15457        $$emit$$"jge     L_loop\n\t"
15458        $$emit$$"add     0x4,rcx\n\t"
15459        $$emit$$"jl      L_tail\n\t"
15460        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15461        $$emit$$"add     0x20,rax\n\t"
15462        $$emit$$"sub     0x4,rcx\n\t"
15463        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15464        $$emit$$"add     0x4,rcx\n\t"
15465        $$emit$$"jle     L_end\n\t"
15466        $$emit$$"dec     rcx\n\t"
15467        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15468        $$emit$$"vmovq   xmm0,(rax)\n\t"
15469        $$emit$$"add     0x8,rax\n\t"
15470        $$emit$$"dec     rcx\n\t"
15471        $$emit$$"jge     L_sloop\n\t"
15472        $$emit$$"# L_end:\n\t"
15473     } else {
15474        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15475     }
15476     $$emit$$"# DONE"
15477   %}
15478   ins_encode %{
15479     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15480                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15481   %}
15482   ins_pipe(pipe_slow);
15483 %}
15484 
15485 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15486                                  Universe dummy, rFlagsReg cr)
15487 %{
15488   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15489   match(Set dummy (ClearArray (Binary cnt base) val));
15490   ins_cost(125);
15491   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15492 
15493   format %{ $$template
15494     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15495     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15496     $$emit$$"jg      LARGE\n\t"
15497     $$emit$$"dec     rcx\n\t"
15498     $$emit$$"js      DONE\t# Zero length\n\t"
15499     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15500     $$emit$$"dec     rcx\n\t"
15501     $$emit$$"jge     LOOP\n\t"
15502     $$emit$$"jmp     DONE\n\t"
15503     $$emit$$"# LARGE:\n\t"
15504     if (UseFastStosb) {
15505        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15506        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15507     } else if (UseXMMForObjInit) {
15508        $$emit$$"mov     rdi,rax\n\t"
15509        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15510        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15511        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15512        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15513        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15514        $$emit$$"add     0x40,rax\n\t"
15515        $$emit$$"# L_zero_64_bytes:\n\t"
15516        $$emit$$"sub     0x8,rcx\n\t"
15517        $$emit$$"jge     L_loop\n\t"
15518        $$emit$$"add     0x4,rcx\n\t"
15519        $$emit$$"jl      L_tail\n\t"
15520        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15521        $$emit$$"add     0x20,rax\n\t"
15522        $$emit$$"sub     0x4,rcx\n\t"
15523        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15524        $$emit$$"add     0x4,rcx\n\t"
15525        $$emit$$"jle     L_end\n\t"
15526        $$emit$$"dec     rcx\n\t"
15527        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15528        $$emit$$"vmovq   xmm0,(rax)\n\t"
15529        $$emit$$"add     0x8,rax\n\t"
15530        $$emit$$"dec     rcx\n\t"
15531        $$emit$$"jge     L_sloop\n\t"
15532        $$emit$$"# L_end:\n\t"
15533     } else {
15534        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15535     }
15536     $$emit$$"# DONE"
15537   %}
15538   ins_encode %{
15539     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15540                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15541   %}
15542   ins_pipe(pipe_slow);
15543 %}
15544 
15545 // Large non-constant length ClearArray for non-AVX512 targets.
15546 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15547                         Universe dummy, rFlagsReg cr)
15548 %{
15549   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15550   match(Set dummy (ClearArray (Binary cnt base) val));
15551   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15552 
15553   format %{ $$template
15554     if (UseFastStosb) {
15555        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15556        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15557     } else if (UseXMMForObjInit) {
15558        $$emit$$"movdq   $tmp, $val\n\t"
15559        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15560        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15561        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15562        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15563        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15564        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15565        $$emit$$"add     0x40,rax\n\t"
15566        $$emit$$"# L_zero_64_bytes:\n\t"
15567        $$emit$$"sub     0x8,rcx\n\t"
15568        $$emit$$"jge     L_loop\n\t"
15569        $$emit$$"add     0x4,rcx\n\t"
15570        $$emit$$"jl      L_tail\n\t"
15571        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15572        $$emit$$"add     0x20,rax\n\t"
15573        $$emit$$"sub     0x4,rcx\n\t"
15574        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15575        $$emit$$"add     0x4,rcx\n\t"
15576        $$emit$$"jle     L_end\n\t"
15577        $$emit$$"dec     rcx\n\t"
15578        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15579        $$emit$$"vmovq   xmm0,(rax)\n\t"
15580        $$emit$$"add     0x8,rax\n\t"
15581        $$emit$$"dec     rcx\n\t"
15582        $$emit$$"jge     L_sloop\n\t"
15583        $$emit$$"# L_end:\n\t"
15584     } else {
15585        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15586     }
15587   %}
15588   ins_encode %{
15589     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15590                  $tmp$$XMMRegister, true, false);
15591   %}
15592   ins_pipe(pipe_slow);
15593 %}
15594 
15595 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15596                                   Universe dummy, rFlagsReg cr)
15597 %{
15598   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15599   match(Set dummy (ClearArray (Binary cnt base) val));
15600   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15601 
15602   format %{ $$template
15603     if (UseXMMForObjInit) {
15604        $$emit$$"movdq   $tmp, $val\n\t"
15605        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15606        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15607        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15608        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15609        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15610        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15611        $$emit$$"add     0x40,rax\n\t"
15612        $$emit$$"# L_zero_64_bytes:\n\t"
15613        $$emit$$"sub     0x8,rcx\n\t"
15614        $$emit$$"jge     L_loop\n\t"
15615        $$emit$$"add     0x4,rcx\n\t"
15616        $$emit$$"jl      L_tail\n\t"
15617        $$emit$$"vmovdqu $tmp,(rax)\n\t"
15618        $$emit$$"add     0x20,rax\n\t"
15619        $$emit$$"sub     0x4,rcx\n\t"
15620        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15621        $$emit$$"add     0x4,rcx\n\t"
15622        $$emit$$"jle     L_end\n\t"
15623        $$emit$$"dec     rcx\n\t"
15624        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15625        $$emit$$"vmovq   xmm0,(rax)\n\t"
15626        $$emit$$"add     0x8,rax\n\t"
15627        $$emit$$"dec     rcx\n\t"
15628        $$emit$$"jge     L_sloop\n\t"
15629        $$emit$$"# L_end:\n\t"
15630     } else {
15631        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15632     }
15633   %}
15634   ins_encode %{
15635     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15636                  $tmp$$XMMRegister, true, true);
15637   %}
15638   ins_pipe(pipe_slow);
15639 %}
15640 
15641 // Large non-constant length ClearArray for AVX512 targets.
15642 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15643                              Universe dummy, rFlagsReg cr)
15644 %{
15645   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15646   match(Set dummy (ClearArray (Binary cnt base) val));
15647   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15648 
15649   format %{ $$template
15650     if (UseFastStosb) {
15651        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15652        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15653        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15654     } else if (UseXMMForObjInit) {
15655        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15656        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15657        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15658        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15659        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15660        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15661        $$emit$$"add     0x40,rax\n\t"
15662        $$emit$$"# L_zero_64_bytes:\n\t"
15663        $$emit$$"sub     0x8,rcx\n\t"
15664        $$emit$$"jge     L_loop\n\t"
15665        $$emit$$"add     0x4,rcx\n\t"
15666        $$emit$$"jl      L_tail\n\t"
15667        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15668        $$emit$$"add     0x20,rax\n\t"
15669        $$emit$$"sub     0x4,rcx\n\t"
15670        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15671        $$emit$$"add     0x4,rcx\n\t"
15672        $$emit$$"jle     L_end\n\t"
15673        $$emit$$"dec     rcx\n\t"
15674        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15675        $$emit$$"vmovq   xmm0,(rax)\n\t"
15676        $$emit$$"add     0x8,rax\n\t"
15677        $$emit$$"dec     rcx\n\t"
15678        $$emit$$"jge     L_sloop\n\t"
15679        $$emit$$"# L_end:\n\t"
15680     } else {
15681        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15682        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15683     }
15684   %}
15685   ins_encode %{
15686     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15687                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15688   %}
15689   ins_pipe(pipe_slow);
15690 %}
15691 
15692 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15693                                        Universe dummy, rFlagsReg cr)
15694 %{
15695   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15696   match(Set dummy (ClearArray (Binary cnt base) val));
15697   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15698 
15699   format %{ $$template
15700     if (UseFastStosb) {
15701        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15702        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15703        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15704     } else if (UseXMMForObjInit) {
15705        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15706        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15707        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15708        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15709        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15710        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15711        $$emit$$"add     0x40,rax\n\t"
15712        $$emit$$"# L_zero_64_bytes:\n\t"
15713        $$emit$$"sub     0x8,rcx\n\t"
15714        $$emit$$"jge     L_loop\n\t"
15715        $$emit$$"add     0x4,rcx\n\t"
15716        $$emit$$"jl      L_tail\n\t"
15717        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15718        $$emit$$"add     0x20,rax\n\t"
15719        $$emit$$"sub     0x4,rcx\n\t"
15720        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15721        $$emit$$"add     0x4,rcx\n\t"
15722        $$emit$$"jle     L_end\n\t"
15723        $$emit$$"dec     rcx\n\t"
15724        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15725        $$emit$$"vmovq   xmm0,(rax)\n\t"
15726        $$emit$$"add     0x8,rax\n\t"
15727        $$emit$$"dec     rcx\n\t"
15728        $$emit$$"jge     L_sloop\n\t"
15729        $$emit$$"# L_end:\n\t"
15730     } else {
15731        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15732        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15733     }
15734   %}
15735   ins_encode %{
15736     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15737                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15738   %}
15739   ins_pipe(pipe_slow);
15740 %}
15741 
15742 // Small constant length ClearArray for AVX512 targets.
15743 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15744 %{
15745   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15746             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15747   match(Set dummy (ClearArray (Binary cnt base) val));
15748   ins_cost(100);
15749   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15750   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15751   ins_encode %{
15752     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15753   %}
15754   ins_pipe(pipe_slow);
15755 %}
15756 
15757 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15758                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15759 %{
15760   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15761   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15762   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15763 
15764   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15765   ins_encode %{
15766     __ string_compare($str1$$Register, $str2$$Register,
15767                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15768                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15769   %}
15770   ins_pipe( pipe_slow );
15771 %}
15772 
15773 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15774                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15775 %{
15776   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15777   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15778   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15779 
15780   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15781   ins_encode %{
15782     __ string_compare($str1$$Register, $str2$$Register,
15783                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15784                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15785   %}
15786   ins_pipe( pipe_slow );
15787 %}
15788 
15789 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15790                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15791 %{
15792   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15793   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15794   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15795 
15796   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15797   ins_encode %{
15798     __ string_compare($str1$$Register, $str2$$Register,
15799                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15800                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15801   %}
15802   ins_pipe( pipe_slow );
15803 %}
15804 
15805 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15806                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15807 %{
15808   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15809   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15810   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15811 
15812   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15813   ins_encode %{
15814     __ string_compare($str1$$Register, $str2$$Register,
15815                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15816                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15817   %}
15818   ins_pipe( pipe_slow );
15819 %}
15820 
15821 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15822                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15823 %{
15824   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15825   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15826   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15827 
15828   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15829   ins_encode %{
15830     __ string_compare($str1$$Register, $str2$$Register,
15831                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15832                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15833   %}
15834   ins_pipe( pipe_slow );
15835 %}
15836 
15837 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15838                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15839 %{
15840   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15841   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15842   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15843 
15844   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15845   ins_encode %{
15846     __ string_compare($str1$$Register, $str2$$Register,
15847                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15848                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15849   %}
15850   ins_pipe( pipe_slow );
15851 %}
15852 
15853 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15854                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15855 %{
15856   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15857   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15858   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15859 
15860   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15861   ins_encode %{
15862     __ string_compare($str2$$Register, $str1$$Register,
15863                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15864                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15865   %}
15866   ins_pipe( pipe_slow );
15867 %}
15868 
15869 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15870                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15871 %{
15872   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15873   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15874   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15875 
15876   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15877   ins_encode %{
15878     __ string_compare($str2$$Register, $str1$$Register,
15879                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15880                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15881   %}
15882   ins_pipe( pipe_slow );
15883 %}
15884 
15885 // fast search of substring with known size.
15886 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15887                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15888 %{
15889   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15890   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15891   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15892 
15893   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15894   ins_encode %{
15895     int icnt2 = (int)$int_cnt2$$constant;
15896     if (icnt2 >= 16) {
15897       // IndexOf for constant substrings with size >= 16 elements
15898       // which don't need to be loaded through stack.
15899       __ string_indexofC8($str1$$Register, $str2$$Register,
15900                           $cnt1$$Register, $cnt2$$Register,
15901                           icnt2, $result$$Register,
15902                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15903     } else {
15904       // Small strings are loaded through stack if they cross page boundary.
15905       __ string_indexof($str1$$Register, $str2$$Register,
15906                         $cnt1$$Register, $cnt2$$Register,
15907                         icnt2, $result$$Register,
15908                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15909     }
15910   %}
15911   ins_pipe( pipe_slow );
15912 %}
15913 
15914 // fast search of substring with known size.
15915 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15916                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15917 %{
15918   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15919   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15920   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15921 
15922   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15923   ins_encode %{
15924     int icnt2 = (int)$int_cnt2$$constant;
15925     if (icnt2 >= 8) {
15926       // IndexOf for constant substrings with size >= 8 elements
15927       // which don't need to be loaded through stack.
15928       __ string_indexofC8($str1$$Register, $str2$$Register,
15929                           $cnt1$$Register, $cnt2$$Register,
15930                           icnt2, $result$$Register,
15931                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15932     } else {
15933       // Small strings are loaded through stack if they cross page boundary.
15934       __ string_indexof($str1$$Register, $str2$$Register,
15935                         $cnt1$$Register, $cnt2$$Register,
15936                         icnt2, $result$$Register,
15937                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15938     }
15939   %}
15940   ins_pipe( pipe_slow );
15941 %}
15942 
15943 // fast search of substring with known size.
15944 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15945                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15946 %{
15947   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15948   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15949   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15950 
15951   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15952   ins_encode %{
15953     int icnt2 = (int)$int_cnt2$$constant;
15954     if (icnt2 >= 8) {
15955       // IndexOf for constant substrings with size >= 8 elements
15956       // which don't need to be loaded through stack.
15957       __ string_indexofC8($str1$$Register, $str2$$Register,
15958                           $cnt1$$Register, $cnt2$$Register,
15959                           icnt2, $result$$Register,
15960                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15961     } else {
15962       // Small strings are loaded through stack if they cross page boundary.
15963       __ string_indexof($str1$$Register, $str2$$Register,
15964                         $cnt1$$Register, $cnt2$$Register,
15965                         icnt2, $result$$Register,
15966                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15967     }
15968   %}
15969   ins_pipe( pipe_slow );
15970 %}
15971 
15972 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15973                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15974 %{
15975   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15976   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15977   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15978 
15979   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15980   ins_encode %{
15981     __ string_indexof($str1$$Register, $str2$$Register,
15982                       $cnt1$$Register, $cnt2$$Register,
15983                       (-1), $result$$Register,
15984                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15985   %}
15986   ins_pipe( pipe_slow );
15987 %}
15988 
15989 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15990                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15991 %{
15992   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15993   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15994   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15995 
15996   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15997   ins_encode %{
15998     __ string_indexof($str1$$Register, $str2$$Register,
15999                       $cnt1$$Register, $cnt2$$Register,
16000                       (-1), $result$$Register,
16001                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
16002   %}
16003   ins_pipe( pipe_slow );
16004 %}
16005 
16006 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
16007                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
16008 %{
16009   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
16010   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16011   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
16012 
16013   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
16014   ins_encode %{
16015     __ string_indexof($str1$$Register, $str2$$Register,
16016                       $cnt1$$Register, $cnt2$$Register,
16017                       (-1), $result$$Register,
16018                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
16019   %}
16020   ins_pipe( pipe_slow );
16021 %}
16022 
16023 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
16024                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
16025 %{
16026   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
16027   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16028   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
16029   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
16030   ins_encode %{
16031     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
16032                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
16033   %}
16034   ins_pipe( pipe_slow );
16035 %}
16036 
16037 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
16038                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
16039 %{
16040   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
16041   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16042   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
16043   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
16044   ins_encode %{
16045     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
16046                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
16047   %}
16048   ins_pipe( pipe_slow );
16049 %}
16050 
16051 // fast string equals
16052 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16053                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
16054 %{
16055   predicate(!VM_Version::supports_avx512vlbw());
16056   match(Set result (StrEquals (Binary str1 str2) cnt));
16057   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16058 
16059   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
16060   ins_encode %{
16061     __ arrays_equals(false, $str1$$Register, $str2$$Register,
16062                      $cnt$$Register, $result$$Register, $tmp3$$Register,
16063                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16064   %}
16065   ins_pipe( pipe_slow );
16066 %}
16067 
16068 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16069                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
16070 %{
16071   predicate(VM_Version::supports_avx512vlbw());
16072   match(Set result (StrEquals (Binary str1 str2) cnt));
16073   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16074 
16075   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
16076   ins_encode %{
16077     __ arrays_equals(false, $str1$$Register, $str2$$Register,
16078                      $cnt$$Register, $result$$Register, $tmp3$$Register,
16079                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16080   %}
16081   ins_pipe( pipe_slow );
16082 %}
16083 
16084 // fast array equals
16085 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16086                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16087 %{
16088   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16089   match(Set result (AryEq ary1 ary2));
16090   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16091 
16092   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16093   ins_encode %{
16094     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16095                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16096                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16097   %}
16098   ins_pipe( pipe_slow );
16099 %}
16100 
16101 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16102                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16103 %{
16104   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16105   match(Set result (AryEq ary1 ary2));
16106   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16107 
16108   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16109   ins_encode %{
16110     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16111                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16112                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16113   %}
16114   ins_pipe( pipe_slow );
16115 %}
16116 
16117 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16118                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16119 %{
16120   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16121   match(Set result (AryEq ary1 ary2));
16122   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16123 
16124   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16125   ins_encode %{
16126     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16127                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16128                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
16129   %}
16130   ins_pipe( pipe_slow );
16131 %}
16132 
16133 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16134                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16135 %{
16136   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16137   match(Set result (AryEq ary1 ary2));
16138   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16139 
16140   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16141   ins_encode %{
16142     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16143                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
16144                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
16145   %}
16146   ins_pipe( pipe_slow );
16147 %}
16148 
16149 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
16150                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
16151                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
16152                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
16153                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
16154 %{
16155   predicate(UseAVX >= 2);
16156   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
16157   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
16158          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
16159          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
16160          USE basic_type, KILL cr);
16161 
16162   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
16163   ins_encode %{
16164     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
16165                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16166                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
16167                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
16168                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
16169                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
16170                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
16171   %}
16172   ins_pipe( pipe_slow );
16173 %}
16174 
16175 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16176                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
16177 %{
16178   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16179   match(Set result (CountPositives ary1 len));
16180   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16181 
16182   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
16183   ins_encode %{
16184     __ count_positives($ary1$$Register, $len$$Register,
16185                        $result$$Register, $tmp3$$Register,
16186                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
16187   %}
16188   ins_pipe( pipe_slow );
16189 %}
16190 
16191 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16192                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
16193 %{
16194   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16195   match(Set result (CountPositives ary1 len));
16196   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16197 
16198   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
16199   ins_encode %{
16200     __ count_positives($ary1$$Register, $len$$Register,
16201                        $result$$Register, $tmp3$$Register,
16202                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
16203   %}
16204   ins_pipe( pipe_slow );
16205 %}
16206 
16207 // fast char[] to byte[] compression
16208 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16209                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16210   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16211   match(Set result (StrCompressedCopy src (Binary dst len)));
16212   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
16213          USE_KILL len, KILL tmp5, KILL cr);
16214 
16215   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
16216   ins_encode %{
16217     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16218                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16219                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16220                            knoreg, knoreg);
16221   %}
16222   ins_pipe( pipe_slow );
16223 %}
16224 
16225 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16226                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16227   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16228   match(Set result (StrCompressedCopy src (Binary dst len)));
16229   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
16230          USE_KILL len, KILL tmp5, KILL cr);
16231 
16232   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
16233   ins_encode %{
16234     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16235                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16236                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16237                            $ktmp1$$KRegister, $ktmp2$$KRegister);
16238   %}
16239   ins_pipe( pipe_slow );
16240 %}
16241 // fast byte[] to char[] inflation
16242 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16243                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
16244   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16245   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16246   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16247 
16248   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16249   ins_encode %{
16250     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16251                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
16252   %}
16253   ins_pipe( pipe_slow );
16254 %}
16255 
16256 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16257                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
16258   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16259   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16260   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16261 
16262   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16263   ins_encode %{
16264     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16265                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
16266   %}
16267   ins_pipe( pipe_slow );
16268 %}
16269 
16270 // encode char[] to byte[] in ISO_8859_1
16271 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16272                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16273                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16274   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
16275   match(Set result (EncodeISOArray src (Binary dst len)));
16276   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16277 
16278   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16279   ins_encode %{
16280     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16281                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16282                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
16283   %}
16284   ins_pipe( pipe_slow );
16285 %}
16286 
16287 // encode char[] to byte[] in ASCII
16288 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16289                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16290                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16291   predicate(((EncodeISOArrayNode*)n)->is_ascii());
16292   match(Set result (EncodeISOArray src (Binary dst len)));
16293   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16294 
16295   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16296   ins_encode %{
16297     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16298                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16299                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16300   %}
16301   ins_pipe( pipe_slow );
16302 %}
16303 
16304 //----------Overflow Math Instructions-----------------------------------------
16305 
16306 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16307 %{
16308   match(Set cr (OverflowAddI op1 op2));
16309   effect(DEF cr, USE_KILL op1, USE op2);
16310 
16311   format %{ "addl    $op1, $op2\t# overflow check int" %}
16312 
16313   ins_encode %{
16314     __ addl($op1$$Register, $op2$$Register);
16315   %}
16316   ins_pipe(ialu_reg_reg);
16317 %}
16318 
16319 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16320 %{
16321   match(Set cr (OverflowAddI op1 op2));
16322   effect(DEF cr, USE_KILL op1, USE op2);
16323 
16324   format %{ "addl    $op1, $op2\t# overflow check int" %}
16325 
16326   ins_encode %{
16327     __ addl($op1$$Register, $op2$$constant);
16328   %}
16329   ins_pipe(ialu_reg_reg);
16330 %}
16331 
16332 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16333 %{
16334   match(Set cr (OverflowAddL op1 op2));
16335   effect(DEF cr, USE_KILL op1, USE op2);
16336 
16337   format %{ "addq    $op1, $op2\t# overflow check long" %}
16338   ins_encode %{
16339     __ addq($op1$$Register, $op2$$Register);
16340   %}
16341   ins_pipe(ialu_reg_reg);
16342 %}
16343 
16344 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16345 %{
16346   match(Set cr (OverflowAddL op1 op2));
16347   effect(DEF cr, USE_KILL op1, USE op2);
16348 
16349   format %{ "addq    $op1, $op2\t# overflow check long" %}
16350   ins_encode %{
16351     __ addq($op1$$Register, $op2$$constant);
16352   %}
16353   ins_pipe(ialu_reg_reg);
16354 %}
16355 
16356 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16357 %{
16358   match(Set cr (OverflowSubI op1 op2));
16359 
16360   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16361   ins_encode %{
16362     __ cmpl($op1$$Register, $op2$$Register);
16363   %}
16364   ins_pipe(ialu_reg_reg);
16365 %}
16366 
16367 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16368 %{
16369   match(Set cr (OverflowSubI op1 op2));
16370 
16371   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16372   ins_encode %{
16373     __ cmpl($op1$$Register, $op2$$constant);
16374   %}
16375   ins_pipe(ialu_reg_reg);
16376 %}
16377 
16378 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16379 %{
16380   match(Set cr (OverflowSubL op1 op2));
16381 
16382   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16383   ins_encode %{
16384     __ cmpq($op1$$Register, $op2$$Register);
16385   %}
16386   ins_pipe(ialu_reg_reg);
16387 %}
16388 
16389 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16390 %{
16391   match(Set cr (OverflowSubL op1 op2));
16392 
16393   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16394   ins_encode %{
16395     __ cmpq($op1$$Register, $op2$$constant);
16396   %}
16397   ins_pipe(ialu_reg_reg);
16398 %}
16399 
16400 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16401 %{
16402   match(Set cr (OverflowSubI zero op2));
16403   effect(DEF cr, USE_KILL op2);
16404 
16405   format %{ "negl    $op2\t# overflow check int" %}
16406   ins_encode %{
16407     __ negl($op2$$Register);
16408   %}
16409   ins_pipe(ialu_reg_reg);
16410 %}
16411 
16412 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16413 %{
16414   match(Set cr (OverflowSubL zero op2));
16415   effect(DEF cr, USE_KILL op2);
16416 
16417   format %{ "negq    $op2\t# overflow check long" %}
16418   ins_encode %{
16419     __ negq($op2$$Register);
16420   %}
16421   ins_pipe(ialu_reg_reg);
16422 %}
16423 
16424 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16425 %{
16426   match(Set cr (OverflowMulI op1 op2));
16427   effect(DEF cr, USE_KILL op1, USE op2);
16428 
16429   format %{ "imull    $op1, $op2\t# overflow check int" %}
16430   ins_encode %{
16431     __ imull($op1$$Register, $op2$$Register);
16432   %}
16433   ins_pipe(ialu_reg_reg_alu0);
16434 %}
16435 
16436 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16437 %{
16438   match(Set cr (OverflowMulI op1 op2));
16439   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16440 
16441   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
16442   ins_encode %{
16443     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16444   %}
16445   ins_pipe(ialu_reg_reg_alu0);
16446 %}
16447 
16448 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16449 %{
16450   match(Set cr (OverflowMulL op1 op2));
16451   effect(DEF cr, USE_KILL op1, USE op2);
16452 
16453   format %{ "imulq    $op1, $op2\t# overflow check long" %}
16454   ins_encode %{
16455     __ imulq($op1$$Register, $op2$$Register);
16456   %}
16457   ins_pipe(ialu_reg_reg_alu0);
16458 %}
16459 
16460 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16461 %{
16462   match(Set cr (OverflowMulL op1 op2));
16463   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16464 
16465   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
16466   ins_encode %{
16467     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16468   %}
16469   ins_pipe(ialu_reg_reg_alu0);
16470 %}
16471 
16472 
16473 //----------Control Flow Instructions------------------------------------------
16474 // Signed compare Instructions
16475 
16476 // XXX more variants!!
16477 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16478 %{
16479   match(Set cr (CmpI op1 op2));
16480   effect(DEF cr, USE op1, USE op2);
16481 
16482   format %{ "cmpl    $op1, $op2" %}
16483   ins_encode %{
16484     __ cmpl($op1$$Register, $op2$$Register);
16485   %}
16486   ins_pipe(ialu_cr_reg_reg);
16487 %}
16488 
16489 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16490 %{
16491   match(Set cr (CmpI op1 op2));
16492 
16493   format %{ "cmpl    $op1, $op2" %}
16494   ins_encode %{
16495     __ cmpl($op1$$Register, $op2$$constant);
16496   %}
16497   ins_pipe(ialu_cr_reg_imm);
16498 %}
16499 
16500 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16501 %{
16502   match(Set cr (CmpI op1 (LoadI op2)));
16503 
16504   ins_cost(500); // XXX
16505   format %{ "cmpl    $op1, $op2" %}
16506   ins_encode %{
16507     __ cmpl($op1$$Register, $op2$$Address);
16508   %}
16509   ins_pipe(ialu_cr_reg_mem);
16510 %}
16511 
16512 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16513 %{
16514   match(Set cr (CmpI src zero));
16515 
16516   format %{ "testl   $src, $src" %}
16517   ins_encode %{
16518     __ testl($src$$Register, $src$$Register);
16519   %}
16520   ins_pipe(ialu_cr_reg_imm);
16521 %}
16522 
16523 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16524 %{
16525   match(Set cr (CmpI (AndI src con) zero));
16526 
16527   format %{ "testl   $src, $con" %}
16528   ins_encode %{
16529     __ testl($src$$Register, $con$$constant);
16530   %}
16531   ins_pipe(ialu_cr_reg_imm);
16532 %}
16533 
16534 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16535 %{
16536   match(Set cr (CmpI (AndI src1 src2) zero));
16537 
16538   format %{ "testl   $src1, $src2" %}
16539   ins_encode %{
16540     __ testl($src1$$Register, $src2$$Register);
16541   %}
16542   ins_pipe(ialu_cr_reg_imm);
16543 %}
16544 
16545 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16546 %{
16547   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16548 
16549   format %{ "testl   $src, $mem" %}
16550   ins_encode %{
16551     __ testl($src$$Register, $mem$$Address);
16552   %}
16553   ins_pipe(ialu_cr_reg_mem);
16554 %}
16555 
16556 // Unsigned compare Instructions; really, same as signed except they
16557 // produce an rFlagsRegU instead of rFlagsReg.
16558 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16559 %{
16560   match(Set cr (CmpU op1 op2));
16561 
16562   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16563   ins_encode %{
16564     __ cmpl($op1$$Register, $op2$$Register);
16565   %}
16566   ins_pipe(ialu_cr_reg_reg);
16567 %}
16568 
16569 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16570 %{
16571   match(Set cr (CmpU op1 op2));
16572 
16573   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16574   ins_encode %{
16575     __ cmpl($op1$$Register, $op2$$constant);
16576   %}
16577   ins_pipe(ialu_cr_reg_imm);
16578 %}
16579 
16580 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16581 %{
16582   match(Set cr (CmpU op1 (LoadI op2)));
16583 
16584   ins_cost(500); // XXX
16585   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16586   ins_encode %{
16587     __ cmpl($op1$$Register, $op2$$Address);
16588   %}
16589   ins_pipe(ialu_cr_reg_mem);
16590 %}
16591 
16592 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16593 %{
16594   match(Set cr (CmpU src zero));
16595 
16596   format %{ "testl   $src, $src\t# unsigned" %}
16597   ins_encode %{
16598     __ testl($src$$Register, $src$$Register);
16599   %}
16600   ins_pipe(ialu_cr_reg_imm);
16601 %}
16602 
16603 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16604 %{
16605   match(Set cr (CmpP op1 op2));
16606 
16607   format %{ "cmpq    $op1, $op2\t# ptr" %}
16608   ins_encode %{
16609     __ cmpq($op1$$Register, $op2$$Register);
16610   %}
16611   ins_pipe(ialu_cr_reg_reg);
16612 %}
16613 
16614 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16615 %{
16616   match(Set cr (CmpP op1 (LoadP op2)));
16617   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16618 
16619   ins_cost(500); // XXX
16620   format %{ "cmpq    $op1, $op2\t# ptr" %}
16621   ins_encode %{
16622     __ cmpq($op1$$Register, $op2$$Address);
16623   %}
16624   ins_pipe(ialu_cr_reg_mem);
16625 %}
16626 
16627 // XXX this is generalized by compP_rReg_mem???
16628 // Compare raw pointer (used in out-of-heap check).
16629 // Only works because non-oop pointers must be raw pointers
16630 // and raw pointers have no anti-dependencies.
16631 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16632 %{
16633   predicate(n->in(2)->in(2)->bottom_type()->isa_rawptr() != nullptr &&
16634             n->in(2)->as_Load()->barrier_data() == 0);
16635   match(Set cr (CmpP op1 (LoadP op2)));
16636 
16637   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16638   ins_encode %{
16639     __ cmpq($op1$$Register, $op2$$Address);
16640   %}
16641   ins_pipe(ialu_cr_reg_mem);
16642 %}
16643 
16644 // This will generate a signed flags result. This should be OK since
16645 // any compare to a zero should be eq/neq.
16646 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16647 %{
16648   match(Set cr (CmpP src zero));
16649 
16650   format %{ "testq   $src, $src\t# ptr" %}
16651   ins_encode %{
16652     __ testq($src$$Register, $src$$Register);
16653   %}
16654   ins_pipe(ialu_cr_reg_imm);
16655 %}
16656 
16657 // This will generate a signed flags result. This should be OK since
16658 // any compare to a zero should be eq/neq.
16659 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16660 %{
16661   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16662             n->in(1)->as_Load()->barrier_data() == 0);
16663   match(Set cr (CmpP (LoadP op) zero));
16664 
16665   ins_cost(500); // XXX
16666   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16667   ins_encode %{
16668     __ testq($op$$Address, 0xFFFFFFFF);
16669   %}
16670   ins_pipe(ialu_cr_reg_imm);
16671 %}
16672 
16673 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16674 %{
16675   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16676             n->in(1)->as_Load()->barrier_data() == 0);
16677   match(Set cr (CmpP (LoadP mem) zero));
16678 
16679   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16680   ins_encode %{
16681     __ cmpq(r12, $mem$$Address);
16682   %}
16683   ins_pipe(ialu_cr_reg_mem);
16684 %}
16685 
16686 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16687 %{
16688   match(Set cr (CmpN op1 op2));
16689 
16690   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16691   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16692   ins_pipe(ialu_cr_reg_reg);
16693 %}
16694 
16695 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16696 %{
16697   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16698   match(Set cr (CmpN src (LoadN mem)));
16699 
16700   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16701   ins_encode %{
16702     __ cmpl($src$$Register, $mem$$Address);
16703   %}
16704   ins_pipe(ialu_cr_reg_mem);
16705 %}
16706 
16707 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16708   match(Set cr (CmpN op1 op2));
16709 
16710   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16711   ins_encode %{
16712     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16713   %}
16714   ins_pipe(ialu_cr_reg_imm);
16715 %}
16716 
16717 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16718 %{
16719   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16720   match(Set cr (CmpN src (LoadN mem)));
16721 
16722   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16723   ins_encode %{
16724     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16725   %}
16726   ins_pipe(ialu_cr_reg_mem);
16727 %}
16728 
16729 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16730   match(Set cr (CmpN op1 op2));
16731 
16732   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16733   ins_encode %{
16734     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16735   %}
16736   ins_pipe(ialu_cr_reg_imm);
16737 %}
16738 
16739 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16740 %{
16741   predicate(!UseCompactObjectHeaders);
16742   match(Set cr (CmpN src (LoadNKlass mem)));
16743 
16744   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16745   ins_encode %{
16746     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16747   %}
16748   ins_pipe(ialu_cr_reg_mem);
16749 %}
16750 
16751 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16752   match(Set cr (CmpN src zero));
16753 
16754   format %{ "testl   $src, $src\t# compressed ptr" %}
16755   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16756   ins_pipe(ialu_cr_reg_imm);
16757 %}
16758 
16759 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16760 %{
16761   predicate(CompressedOops::base() != nullptr &&
16762             n->in(1)->as_Load()->barrier_data() == 0);
16763   match(Set cr (CmpN (LoadN mem) zero));
16764 
16765   ins_cost(500); // XXX
16766   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16767   ins_encode %{
16768     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16769   %}
16770   ins_pipe(ialu_cr_reg_mem);
16771 %}
16772 
16773 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16774 %{
16775   predicate(CompressedOops::base() == nullptr &&
16776             n->in(1)->as_Load()->barrier_data() == 0);
16777   match(Set cr (CmpN (LoadN mem) zero));
16778 
16779   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16780   ins_encode %{
16781     __ cmpl(r12, $mem$$Address);
16782   %}
16783   ins_pipe(ialu_cr_reg_mem);
16784 %}
16785 
16786 // Yanked all unsigned pointer compare operations.
16787 // Pointer compares are done with CmpP which is already unsigned.
16788 
16789 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16790 %{
16791   match(Set cr (CmpL op1 op2));
16792 
16793   format %{ "cmpq    $op1, $op2" %}
16794   ins_encode %{
16795     __ cmpq($op1$$Register, $op2$$Register);
16796   %}
16797   ins_pipe(ialu_cr_reg_reg);
16798 %}
16799 
16800 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16801 %{
16802   match(Set cr (CmpL op1 op2));
16803 
16804   format %{ "cmpq    $op1, $op2" %}
16805   ins_encode %{
16806     __ cmpq($op1$$Register, $op2$$constant);
16807   %}
16808   ins_pipe(ialu_cr_reg_imm);
16809 %}
16810 
16811 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16812 %{
16813   match(Set cr (CmpL op1 (LoadL op2)));
16814 
16815   format %{ "cmpq    $op1, $op2" %}
16816   ins_encode %{
16817     __ cmpq($op1$$Register, $op2$$Address);
16818   %}
16819   ins_pipe(ialu_cr_reg_mem);
16820 %}
16821 
16822 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16823 %{
16824   match(Set cr (CmpL src zero));
16825 
16826   format %{ "testq   $src, $src" %}
16827   ins_encode %{
16828     __ testq($src$$Register, $src$$Register);
16829   %}
16830   ins_pipe(ialu_cr_reg_imm);
16831 %}
16832 
16833 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16834 %{
16835   match(Set cr (CmpL (AndL src con) zero));
16836 
16837   format %{ "testq   $src, $con\t# long" %}
16838   ins_encode %{
16839     __ testq($src$$Register, $con$$constant);
16840   %}
16841   ins_pipe(ialu_cr_reg_imm);
16842 %}
16843 
16844 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16845 %{
16846   match(Set cr (CmpL (AndL src1 src2) zero));
16847 
16848   format %{ "testq   $src1, $src2\t# long" %}
16849   ins_encode %{
16850     __ testq($src1$$Register, $src2$$Register);
16851   %}
16852   ins_pipe(ialu_cr_reg_imm);
16853 %}
16854 
16855 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16856 %{
16857   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16858 
16859   format %{ "testq   $src, $mem" %}
16860   ins_encode %{
16861     __ testq($src$$Register, $mem$$Address);
16862   %}
16863   ins_pipe(ialu_cr_reg_mem);
16864 %}
16865 
16866 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16867 %{
16868   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16869 
16870   format %{ "testq   $src, $mem" %}
16871   ins_encode %{
16872     __ testq($src$$Register, $mem$$Address);
16873   %}
16874   ins_pipe(ialu_cr_reg_mem);
16875 %}
16876 
16877 // Manifest a CmpU result in an integer register.  Very painful.
16878 // This is the test to avoid.
16879 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16880 %{
16881   match(Set dst (CmpU3 src1 src2));
16882   effect(KILL flags);
16883 
16884   ins_cost(275); // XXX
16885   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16886             "movl    $dst, -1\n\t"
16887             "jb,u    done\n\t"
16888             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16889     "done:" %}
16890   ins_encode %{
16891     Label done;
16892     __ cmpl($src1$$Register, $src2$$Register);
16893     __ movl($dst$$Register, -1);
16894     __ jccb(Assembler::below, done);
16895     __ setcc(Assembler::notZero, $dst$$Register);
16896     __ bind(done);
16897   %}
16898   ins_pipe(pipe_slow);
16899 %}
16900 
16901 // Manifest a CmpL result in an integer register.  Very painful.
16902 // This is the test to avoid.
16903 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16904 %{
16905   match(Set dst (CmpL3 src1 src2));
16906   effect(KILL flags);
16907 
16908   ins_cost(275); // XXX
16909   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16910             "movl    $dst, -1\n\t"
16911             "jl,s    done\n\t"
16912             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16913     "done:" %}
16914   ins_encode %{
16915     Label done;
16916     __ cmpq($src1$$Register, $src2$$Register);
16917     __ movl($dst$$Register, -1);
16918     __ jccb(Assembler::less, done);
16919     __ setcc(Assembler::notZero, $dst$$Register);
16920     __ bind(done);
16921   %}
16922   ins_pipe(pipe_slow);
16923 %}
16924 
16925 // Manifest a CmpUL result in an integer register.  Very painful.
16926 // This is the test to avoid.
16927 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16928 %{
16929   match(Set dst (CmpUL3 src1 src2));
16930   effect(KILL flags);
16931 
16932   ins_cost(275); // XXX
16933   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16934             "movl    $dst, -1\n\t"
16935             "jb,u    done\n\t"
16936             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16937     "done:" %}
16938   ins_encode %{
16939     Label done;
16940     __ cmpq($src1$$Register, $src2$$Register);
16941     __ movl($dst$$Register, -1);
16942     __ jccb(Assembler::below, done);
16943     __ setcc(Assembler::notZero, $dst$$Register);
16944     __ bind(done);
16945   %}
16946   ins_pipe(pipe_slow);
16947 %}
16948 
16949 // Unsigned long compare Instructions; really, same as signed long except they
16950 // produce an rFlagsRegU instead of rFlagsReg.
16951 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16952 %{
16953   match(Set cr (CmpUL op1 op2));
16954 
16955   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16956   ins_encode %{
16957     __ cmpq($op1$$Register, $op2$$Register);
16958   %}
16959   ins_pipe(ialu_cr_reg_reg);
16960 %}
16961 
16962 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16963 %{
16964   match(Set cr (CmpUL op1 op2));
16965 
16966   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16967   ins_encode %{
16968     __ cmpq($op1$$Register, $op2$$constant);
16969   %}
16970   ins_pipe(ialu_cr_reg_imm);
16971 %}
16972 
16973 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16974 %{
16975   match(Set cr (CmpUL op1 (LoadL op2)));
16976 
16977   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16978   ins_encode %{
16979     __ cmpq($op1$$Register, $op2$$Address);
16980   %}
16981   ins_pipe(ialu_cr_reg_mem);
16982 %}
16983 
16984 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16985 %{
16986   match(Set cr (CmpUL src zero));
16987 
16988   format %{ "testq   $src, $src\t# unsigned" %}
16989   ins_encode %{
16990     __ testq($src$$Register, $src$$Register);
16991   %}
16992   ins_pipe(ialu_cr_reg_imm);
16993 %}
16994 
16995 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16996 %{
16997   match(Set cr (CmpI (LoadB mem) imm));
16998 
16999   ins_cost(125);
17000   format %{ "cmpb    $mem, $imm" %}
17001   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
17002   ins_pipe(ialu_cr_reg_mem);
17003 %}
17004 
17005 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
17006 %{
17007   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
17008 
17009   ins_cost(125);
17010   format %{ "testb   $mem, $imm\t# ubyte" %}
17011   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
17012   ins_pipe(ialu_cr_reg_mem);
17013 %}
17014 
17015 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
17016 %{
17017   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
17018 
17019   ins_cost(125);
17020   format %{ "testb   $mem, $imm\t# byte" %}
17021   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
17022   ins_pipe(ialu_cr_reg_mem);
17023 %}
17024 
17025 //----------Max and Min--------------------------------------------------------
17026 // Min Instructions
17027 
17028 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
17029 %{
17030   predicate(!UseAPX);
17031   effect(USE_DEF dst, USE src, USE cr);
17032 
17033   format %{ "cmovlgt $dst, $src\t# min" %}
17034   ins_encode %{
17035     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
17036   %}
17037   ins_pipe(pipe_cmov_reg);
17038 %}
17039 
17040 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
17041 %{
17042   predicate(UseAPX);
17043   effect(DEF dst, USE src1, USE src2, USE cr);
17044 
17045   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
17046   ins_encode %{
17047     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
17048   %}
17049   ins_pipe(pipe_cmov_reg);
17050 %}
17051 
17052 instruct minI_rReg(rRegI dst, rRegI src)
17053 %{
17054   predicate(!UseAPX);
17055   match(Set dst (MinI dst src));
17056 
17057   ins_cost(200);
17058   expand %{
17059     rFlagsReg cr;
17060     compI_rReg(cr, dst, src);
17061     cmovI_reg_g(dst, src, cr);
17062   %}
17063 %}
17064 
17065 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17066 %{
17067   predicate(UseAPX);
17068   match(Set dst (MinI src1 src2));
17069   effect(DEF dst, USE src1, USE src2);
17070   flag(PD::Flag_ndd_demotable_opr1);
17071 
17072   ins_cost(200);
17073   expand %{
17074     rFlagsReg cr;
17075     compI_rReg(cr, src1, src2);
17076     cmovI_reg_g_ndd(dst, src1, src2, cr);
17077   %}
17078 %}
17079 
17080 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
17081 %{
17082   predicate(!UseAPX);
17083   effect(USE_DEF dst, USE src, USE cr);
17084 
17085   format %{ "cmovllt $dst, $src\t# max" %}
17086   ins_encode %{
17087     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
17088   %}
17089   ins_pipe(pipe_cmov_reg);
17090 %}
17091 
17092 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
17093 %{
17094   predicate(UseAPX);
17095   effect(DEF dst, USE src1, USE src2, USE cr);
17096 
17097   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
17098   ins_encode %{
17099     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
17100   %}
17101   ins_pipe(pipe_cmov_reg);
17102 %}
17103 
17104 instruct maxI_rReg(rRegI dst, rRegI src)
17105 %{
17106   predicate(!UseAPX);
17107   match(Set dst (MaxI dst src));
17108 
17109   ins_cost(200);
17110   expand %{
17111     rFlagsReg cr;
17112     compI_rReg(cr, dst, src);
17113     cmovI_reg_l(dst, src, cr);
17114   %}
17115 %}
17116 
17117 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17118 %{
17119   predicate(UseAPX);
17120   match(Set dst (MaxI src1 src2));
17121   effect(DEF dst, USE src1, USE src2);
17122   flag(PD::Flag_ndd_demotable_opr1);
17123 
17124   ins_cost(200);
17125   expand %{
17126     rFlagsReg cr;
17127     compI_rReg(cr, src1, src2);
17128     cmovI_reg_l_ndd(dst, src1, src2, cr);
17129   %}
17130 %}
17131 
17132 // ============================================================================
17133 // Branch Instructions
17134 
17135 // Jump Direct - Label defines a relative address from JMP+1
17136 instruct jmpDir(label labl)
17137 %{
17138   match(Goto);
17139   effect(USE labl);
17140 
17141   ins_cost(300);
17142   format %{ "jmp     $labl" %}
17143   size(5);
17144   ins_encode %{
17145     Label* L = $labl$$label;
17146     __ jmp(*L, false); // Always long jump
17147   %}
17148   ins_pipe(pipe_jmp);
17149 %}
17150 
17151 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17152 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
17153 %{
17154   match(If cop cr);
17155   effect(USE labl);
17156 
17157   ins_cost(300);
17158   format %{ "j$cop     $labl" %}
17159   size(6);
17160   ins_encode %{
17161     Label* L = $labl$$label;
17162     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17163   %}
17164   ins_pipe(pipe_jcc);
17165 %}
17166 
17167 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17168 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
17169 %{
17170   match(CountedLoopEnd cop cr);
17171   effect(USE labl);
17172 
17173   ins_cost(300);
17174   format %{ "j$cop     $labl\t# loop end" %}
17175   size(6);
17176   ins_encode %{
17177     Label* L = $labl$$label;
17178     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17179   %}
17180   ins_pipe(pipe_jcc);
17181 %}
17182 
17183 // Jump Direct Conditional - using unsigned comparison
17184 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17185   match(If cop cmp);
17186   effect(USE labl);
17187 
17188   ins_cost(300);
17189   format %{ "j$cop,u   $labl" %}
17190   size(6);
17191   ins_encode %{
17192     Label* L = $labl$$label;
17193     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17194   %}
17195   ins_pipe(pipe_jcc);
17196 %}
17197 
17198 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17199   match(If cop cmp);
17200   effect(USE labl);
17201 
17202   ins_cost(200);
17203   format %{ "j$cop,u   $labl" %}
17204   size(6);
17205   ins_encode %{
17206     Label* L = $labl$$label;
17207     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17208   %}
17209   ins_pipe(pipe_jcc);
17210 %}
17211 
17212 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17213   match(If cop cmp);
17214   effect(USE labl);
17215 
17216   ins_cost(200);
17217   format %{ $$template
17218     if ($cop$$cmpcode == Assembler::notEqual) {
17219       $$emit$$"jp,u    $labl\n\t"
17220       $$emit$$"j$cop,u   $labl"
17221     } else {
17222       $$emit$$"jp,u    done\n\t"
17223       $$emit$$"j$cop,u   $labl\n\t"
17224       $$emit$$"done:"
17225     }
17226   %}
17227   ins_encode %{
17228     Label* l = $labl$$label;
17229     if ($cop$$cmpcode == Assembler::notEqual) {
17230       __ jcc(Assembler::parity, *l, false);
17231       __ jcc(Assembler::notEqual, *l, false);
17232     } else if ($cop$$cmpcode == Assembler::equal) {
17233       Label done;
17234       __ jccb(Assembler::parity, done);
17235       __ jcc(Assembler::equal, *l, false);
17236       __ bind(done);
17237     } else {
17238        ShouldNotReachHere();
17239     }
17240   %}
17241   ins_pipe(pipe_jcc);
17242 %}
17243 
17244 // Jump Direct Conditional - using signed and unsigned comparison
17245 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17246   match(If cop cmp);
17247   effect(USE labl);
17248 
17249   ins_cost(200);
17250   format %{ "j$cop,su   $labl" %}
17251   size(6);
17252   ins_encode %{
17253     Label* L = $labl$$label;
17254     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17255   %}
17256   ins_pipe(pipe_jcc);
17257 %}
17258 
17259 // ============================================================================
17260 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
17261 // superklass array for an instance of the superklass.  Set a hidden
17262 // internal cache on a hit (cache is checked with exposed code in
17263 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
17264 // encoding ALSO sets flags.
17265 
17266 instruct partialSubtypeCheck(rdi_RegP result,
17267                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
17268                              rFlagsReg cr)
17269 %{
17270   match(Set result (PartialSubtypeCheck sub super));
17271   predicate(!UseSecondarySupersTable);
17272   effect(KILL rcx, KILL cr);
17273 
17274   ins_cost(1100);  // slightly larger than the next version
17275   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
17276             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
17277             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
17278             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
17279             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
17280             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
17281             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
17282     "miss:\t" %}
17283 
17284   ins_encode %{
17285     Label miss;
17286     // NB: Callers may assume that, when $result is a valid register,
17287     // check_klass_subtype_slow_path_linear sets it to a nonzero
17288     // value.
17289     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
17290                                             $rcx$$Register, $result$$Register,
17291                                             nullptr, &miss,
17292                                             /*set_cond_codes:*/ true);
17293     __ xorptr($result$$Register, $result$$Register);
17294     __ bind(miss);
17295   %}
17296 
17297   ins_pipe(pipe_slow);
17298 %}
17299 
17300 // ============================================================================
17301 // Two versions of hashtable-based partialSubtypeCheck, both used when
17302 // we need to search for a super class in the secondary supers array.
17303 // The first is used when we don't know _a priori_ the class being
17304 // searched for. The second, far more common, is used when we do know:
17305 // this is used for instanceof, checkcast, and any case where C2 can
17306 // determine it by constant propagation.
17307 
17308 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17309                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17310                                        rFlagsReg cr)
17311 %{
17312   match(Set result (PartialSubtypeCheck sub super));
17313   predicate(UseSecondarySupersTable);
17314   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17315 
17316   ins_cost(1000);
17317   format %{ "partialSubtypeCheck $result, $sub, $super" %}
17318 
17319   ins_encode %{
17320     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17321 					 $temp3$$Register, $temp4$$Register, $result$$Register);
17322   %}
17323 
17324   ins_pipe(pipe_slow);
17325 %}
17326 
17327 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17328                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17329                                        rFlagsReg cr)
17330 %{
17331   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17332   predicate(UseSecondarySupersTable);
17333   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17334 
17335   ins_cost(700);  // smaller than the next version
17336   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17337 
17338   ins_encode %{
17339     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17340     if (InlineSecondarySupersTest) {
17341       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17342                                        $temp3$$Register, $temp4$$Register, $result$$Register,
17343                                        super_klass_slot);
17344     } else {
17345       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17346     }
17347   %}
17348 
17349   ins_pipe(pipe_slow);
17350 %}
17351 
17352 // ============================================================================
17353 // Branch Instructions -- short offset versions
17354 //
17355 // These instructions are used to replace jumps of a long offset (the default
17356 // match) with jumps of a shorter offset.  These instructions are all tagged
17357 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17358 // match rules in general matching.  Instead, the ADLC generates a conversion
17359 // method in the MachNode which can be used to do in-place replacement of the
17360 // long variant with the shorter variant.  The compiler will determine if a
17361 // branch can be taken by the is_short_branch_offset() predicate in the machine
17362 // specific code section of the file.
17363 
17364 // Jump Direct - Label defines a relative address from JMP+1
17365 instruct jmpDir_short(label labl) %{
17366   match(Goto);
17367   effect(USE labl);
17368 
17369   ins_cost(300);
17370   format %{ "jmp,s   $labl" %}
17371   size(2);
17372   ins_encode %{
17373     Label* L = $labl$$label;
17374     __ jmpb(*L);
17375   %}
17376   ins_pipe(pipe_jmp);
17377   ins_short_branch(1);
17378 %}
17379 
17380 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17381 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17382   match(If cop cr);
17383   effect(USE labl);
17384 
17385   ins_cost(300);
17386   format %{ "j$cop,s   $labl" %}
17387   size(2);
17388   ins_encode %{
17389     Label* L = $labl$$label;
17390     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17391   %}
17392   ins_pipe(pipe_jcc);
17393   ins_short_branch(1);
17394 %}
17395 
17396 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17397 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17398   match(CountedLoopEnd cop cr);
17399   effect(USE labl);
17400 
17401   ins_cost(300);
17402   format %{ "j$cop,s   $labl\t# loop end" %}
17403   size(2);
17404   ins_encode %{
17405     Label* L = $labl$$label;
17406     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17407   %}
17408   ins_pipe(pipe_jcc);
17409   ins_short_branch(1);
17410 %}
17411 
17412 // Jump Direct Conditional - using unsigned comparison
17413 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17414   match(If cop cmp);
17415   effect(USE labl);
17416 
17417   ins_cost(300);
17418   format %{ "j$cop,us  $labl" %}
17419   size(2);
17420   ins_encode %{
17421     Label* L = $labl$$label;
17422     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17423   %}
17424   ins_pipe(pipe_jcc);
17425   ins_short_branch(1);
17426 %}
17427 
17428 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17429   match(If cop cmp);
17430   effect(USE labl);
17431 
17432   ins_cost(300);
17433   format %{ "j$cop,us  $labl" %}
17434   size(2);
17435   ins_encode %{
17436     Label* L = $labl$$label;
17437     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17438   %}
17439   ins_pipe(pipe_jcc);
17440   ins_short_branch(1);
17441 %}
17442 
17443 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17444   match(If cop cmp);
17445   effect(USE labl);
17446 
17447   ins_cost(300);
17448   format %{ $$template
17449     if ($cop$$cmpcode == Assembler::notEqual) {
17450       $$emit$$"jp,u,s  $labl\n\t"
17451       $$emit$$"j$cop,u,s  $labl"
17452     } else {
17453       $$emit$$"jp,u,s  done\n\t"
17454       $$emit$$"j$cop,u,s  $labl\n\t"
17455       $$emit$$"done:"
17456     }
17457   %}
17458   size(4);
17459   ins_encode %{
17460     Label* l = $labl$$label;
17461     if ($cop$$cmpcode == Assembler::notEqual) {
17462       __ jccb(Assembler::parity, *l);
17463       __ jccb(Assembler::notEqual, *l);
17464     } else if ($cop$$cmpcode == Assembler::equal) {
17465       Label done;
17466       __ jccb(Assembler::parity, done);
17467       __ jccb(Assembler::equal, *l);
17468       __ bind(done);
17469     } else {
17470        ShouldNotReachHere();
17471     }
17472   %}
17473   ins_pipe(pipe_jcc);
17474   ins_short_branch(1);
17475 %}
17476 
17477 // Jump Direct Conditional - using signed and unsigned comparison
17478 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17479   match(If cop cmp);
17480   effect(USE labl);
17481 
17482   ins_cost(300);
17483   format %{ "j$cop,sus  $labl" %}
17484   size(2);
17485   ins_encode %{
17486     Label* L = $labl$$label;
17487     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17488   %}
17489   ins_pipe(pipe_jcc);
17490   ins_short_branch(1);
17491 %}
17492 
17493 // ============================================================================
17494 // inlined locking and unlocking
17495 
17496 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17497   match(Set cr (FastLock object box));
17498   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17499   ins_cost(300);
17500   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17501   ins_encode %{
17502     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17503   %}
17504   ins_pipe(pipe_slow);
17505 %}
17506 
17507 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17508   match(Set cr (FastUnlock object rax_reg));
17509   effect(TEMP tmp, USE_KILL rax_reg);
17510   ins_cost(300);
17511   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17512   ins_encode %{
17513     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17514   %}
17515   ins_pipe(pipe_slow);
17516 %}
17517 
17518 
17519 // ============================================================================
17520 // Safepoint Instructions
17521 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17522 %{
17523   match(SafePoint poll);
17524   effect(KILL cr, USE poll);
17525 
17526   format %{ "testl   rax, [$poll]\t"
17527             "# Safepoint: poll for GC" %}
17528   ins_cost(125);
17529   ins_encode %{
17530     __ relocate(relocInfo::poll_type);
17531     address pre_pc = __ pc();
17532     __ testl(rax, Address($poll$$Register, 0));
17533     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17534   %}
17535   ins_pipe(ialu_reg_mem);
17536 %}
17537 
17538 instruct mask_all_evexL(kReg dst, rRegL src) %{
17539   match(Set dst (MaskAll src));
17540   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17541   ins_encode %{
17542     int mask_len = Matcher::vector_length(this);
17543     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17544   %}
17545   ins_pipe( pipe_slow );
17546 %}
17547 
17548 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17549   predicate(Matcher::vector_length(n) > 32);
17550   match(Set dst (MaskAll src));
17551   effect(TEMP tmp);
17552   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17553   ins_encode %{
17554     int mask_len = Matcher::vector_length(this);
17555     __ movslq($tmp$$Register, $src$$Register);
17556     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17557   %}
17558   ins_pipe( pipe_slow );
17559 %}
17560 
17561 // ============================================================================
17562 // Procedure Call/Return Instructions
17563 // Call Java Static Instruction
17564 // Note: If this code changes, the corresponding ret_addr_offset() and
17565 //       compute_padding() functions will have to be adjusted.
17566 instruct CallStaticJavaDirect(method meth) %{
17567   match(CallStaticJava);
17568   effect(USE meth);
17569 
17570   ins_cost(300);
17571   format %{ "call,static " %}
17572   opcode(0xE8); /* E8 cd */
17573   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17574   ins_pipe(pipe_slow);
17575   ins_alignment(4);
17576 %}
17577 
17578 // Call Java Dynamic Instruction
17579 // Note: If this code changes, the corresponding ret_addr_offset() and
17580 //       compute_padding() functions will have to be adjusted.
17581 instruct CallDynamicJavaDirect(method meth)
17582 %{
17583   match(CallDynamicJava);
17584   effect(USE meth);
17585 
17586   ins_cost(300);
17587   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17588             "call,dynamic " %}
17589   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17590   ins_pipe(pipe_slow);
17591   ins_alignment(4);
17592 %}
17593 
17594 // Call Runtime Instruction
17595 instruct CallRuntimeDirect(method meth)
17596 %{
17597   match(CallRuntime);
17598   effect(USE meth);
17599 
17600   ins_cost(300);
17601   format %{ "call,runtime " %}
17602   ins_encode(clear_avx, Java_To_Runtime(meth));
17603   ins_pipe(pipe_slow);
17604 %}
17605 
17606 // Call runtime without safepoint
17607 instruct CallLeafDirect(method meth)
17608 %{
17609   match(CallLeaf);
17610   effect(USE meth);
17611 
17612   ins_cost(300);
17613   format %{ "call_leaf,runtime " %}
17614   ins_encode(clear_avx, Java_To_Runtime(meth));
17615   ins_pipe(pipe_slow);
17616 %}
17617 
17618 // Call runtime without safepoint and with vector arguments
17619 instruct CallLeafDirectVector(method meth)
17620 %{
17621   match(CallLeafVector);
17622   effect(USE meth);
17623 
17624   ins_cost(300);
17625   format %{ "call_leaf,vector " %}
17626   ins_encode(Java_To_Runtime(meth));
17627   ins_pipe(pipe_slow);
17628 %}
17629 
17630 // Call runtime without safepoint
17631 // entry point is null, target holds the address to call
17632 instruct CallLeafNoFPInDirect(rRegP target)
17633 %{
17634   predicate(n->as_Call()->entry_point() == nullptr);
17635   match(CallLeafNoFP target);
17636 
17637   ins_cost(300);
17638   format %{ "call_leaf_nofp,runtime indirect " %}
17639   ins_encode %{
17640      __ call($target$$Register);
17641   %}
17642 
17643   ins_pipe(pipe_slow);
17644 %}
17645 
17646 // Call runtime without safepoint
17647 instruct CallLeafNoFPDirect(method meth)
17648 %{
17649   predicate(n->as_Call()->entry_point() != nullptr);
17650   match(CallLeafNoFP);
17651   effect(USE meth);
17652 
17653   ins_cost(300);
17654   format %{ "call_leaf_nofp,runtime " %}
17655   ins_encode(clear_avx, Java_To_Runtime(meth));
17656   ins_pipe(pipe_slow);
17657 %}
17658 
17659 // Return Instruction
17660 // Remove the return address & jump to it.
17661 // Notice: We always emit a nop after a ret to make sure there is room
17662 // for safepoint patching
17663 instruct Ret()
17664 %{
17665   match(Return);
17666 
17667   format %{ "ret" %}
17668   ins_encode %{
17669     __ ret(0);
17670   %}
17671   ins_pipe(pipe_jmp);
17672 %}
17673 
17674 // Tail Call; Jump from runtime stub to Java code.
17675 // Also known as an 'interprocedural jump'.
17676 // Target of jump will eventually return to caller.
17677 // TailJump below removes the return address.
17678 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17679 // emitted just above the TailCall which has reset rbp to the caller state.
17680 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17681 %{
17682   match(TailCall jump_target method_ptr);
17683 
17684   ins_cost(300);
17685   format %{ "jmp     $jump_target\t# rbx holds method" %}
17686   ins_encode %{
17687     __ jmp($jump_target$$Register);
17688   %}
17689   ins_pipe(pipe_jmp);
17690 %}
17691 
17692 // Tail Jump; remove the return address; jump to target.
17693 // TailCall above leaves the return address around.
17694 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17695 %{
17696   match(TailJump jump_target ex_oop);
17697 
17698   ins_cost(300);
17699   format %{ "popq    rdx\t# pop return address\n\t"
17700             "jmp     $jump_target" %}
17701   ins_encode %{
17702     __ popq(as_Register(RDX_enc));
17703     __ jmp($jump_target$$Register);
17704   %}
17705   ins_pipe(pipe_jmp);
17706 %}
17707 
17708 // Forward exception.
17709 instruct ForwardExceptionjmp()
17710 %{
17711   match(ForwardException);
17712 
17713   format %{ "jmp     forward_exception_stub" %}
17714   ins_encode %{
17715     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17716   %}
17717   ins_pipe(pipe_jmp);
17718 %}
17719 
17720 // Create exception oop: created by stack-crawling runtime code.
17721 // Created exception is now available to this handler, and is setup
17722 // just prior to jumping to this handler.  No code emitted.
17723 instruct CreateException(rax_RegP ex_oop)
17724 %{
17725   match(Set ex_oop (CreateEx));
17726 
17727   size(0);
17728   // use the following format syntax
17729   format %{ "# exception oop is in rax; no code emitted" %}
17730   ins_encode();
17731   ins_pipe(empty);
17732 %}
17733 
17734 // Rethrow exception:
17735 // The exception oop will come in the first argument position.
17736 // Then JUMP (not call) to the rethrow stub code.
17737 instruct RethrowException()
17738 %{
17739   match(Rethrow);
17740 
17741   // use the following format syntax
17742   format %{ "jmp     rethrow_stub" %}
17743   ins_encode %{
17744     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17745   %}
17746   ins_pipe(pipe_jmp);
17747 %}
17748 
17749 // ============================================================================
17750 // This name is KNOWN by the ADLC and cannot be changed.
17751 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17752 // for this guy.
17753 instruct tlsLoadP(r15_RegP dst) %{
17754   match(Set dst (ThreadLocal));
17755   effect(DEF dst);
17756 
17757   size(0);
17758   format %{ "# TLS is in R15" %}
17759   ins_encode( /*empty encoding*/ );
17760   ins_pipe(ialu_reg_reg);
17761 %}
17762 
17763 instruct addF_reg(regF dst, regF src) %{
17764   predicate(UseAVX == 0);
17765   match(Set dst (AddF dst src));
17766 
17767   format %{ "addss   $dst, $src" %}
17768   ins_cost(150);
17769   ins_encode %{
17770     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17771   %}
17772   ins_pipe(pipe_slow);
17773 %}
17774 
17775 instruct addF_mem(regF dst, memory src) %{
17776   predicate(UseAVX == 0);
17777   match(Set dst (AddF dst (LoadF src)));
17778 
17779   format %{ "addss   $dst, $src" %}
17780   ins_cost(150);
17781   ins_encode %{
17782     __ addss($dst$$XMMRegister, $src$$Address);
17783   %}
17784   ins_pipe(pipe_slow);
17785 %}
17786 
17787 instruct addF_imm(regF dst, immF con) %{
17788   predicate(UseAVX == 0);
17789   match(Set dst (AddF dst con));
17790   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17791   ins_cost(150);
17792   ins_encode %{
17793     __ addss($dst$$XMMRegister, $constantaddress($con));
17794   %}
17795   ins_pipe(pipe_slow);
17796 %}
17797 
17798 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17799   predicate(UseAVX > 0);
17800   match(Set dst (AddF src1 src2));
17801 
17802   format %{ "vaddss  $dst, $src1, $src2" %}
17803   ins_cost(150);
17804   ins_encode %{
17805     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17806   %}
17807   ins_pipe(pipe_slow);
17808 %}
17809 
17810 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17811   predicate(UseAVX > 0);
17812   match(Set dst (AddF src1 (LoadF src2)));
17813 
17814   format %{ "vaddss  $dst, $src1, $src2" %}
17815   ins_cost(150);
17816   ins_encode %{
17817     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17818   %}
17819   ins_pipe(pipe_slow);
17820 %}
17821 
17822 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17823   predicate(UseAVX > 0);
17824   match(Set dst (AddF src con));
17825 
17826   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17827   ins_cost(150);
17828   ins_encode %{
17829     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17830   %}
17831   ins_pipe(pipe_slow);
17832 %}
17833 
17834 instruct addD_reg(regD dst, regD src) %{
17835   predicate(UseAVX == 0);
17836   match(Set dst (AddD dst src));
17837 
17838   format %{ "addsd   $dst, $src" %}
17839   ins_cost(150);
17840   ins_encode %{
17841     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17842   %}
17843   ins_pipe(pipe_slow);
17844 %}
17845 
17846 instruct addD_mem(regD dst, memory src) %{
17847   predicate(UseAVX == 0);
17848   match(Set dst (AddD dst (LoadD src)));
17849 
17850   format %{ "addsd   $dst, $src" %}
17851   ins_cost(150);
17852   ins_encode %{
17853     __ addsd($dst$$XMMRegister, $src$$Address);
17854   %}
17855   ins_pipe(pipe_slow);
17856 %}
17857 
17858 instruct addD_imm(regD dst, immD con) %{
17859   predicate(UseAVX == 0);
17860   match(Set dst (AddD dst con));
17861   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17862   ins_cost(150);
17863   ins_encode %{
17864     __ addsd($dst$$XMMRegister, $constantaddress($con));
17865   %}
17866   ins_pipe(pipe_slow);
17867 %}
17868 
17869 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17870   predicate(UseAVX > 0);
17871   match(Set dst (AddD src1 src2));
17872 
17873   format %{ "vaddsd  $dst, $src1, $src2" %}
17874   ins_cost(150);
17875   ins_encode %{
17876     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17877   %}
17878   ins_pipe(pipe_slow);
17879 %}
17880 
17881 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17882   predicate(UseAVX > 0);
17883   match(Set dst (AddD src1 (LoadD src2)));
17884 
17885   format %{ "vaddsd  $dst, $src1, $src2" %}
17886   ins_cost(150);
17887   ins_encode %{
17888     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17889   %}
17890   ins_pipe(pipe_slow);
17891 %}
17892 
17893 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17894   predicate(UseAVX > 0);
17895   match(Set dst (AddD src con));
17896 
17897   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17898   ins_cost(150);
17899   ins_encode %{
17900     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17901   %}
17902   ins_pipe(pipe_slow);
17903 %}
17904 
17905 instruct subF_reg(regF dst, regF src) %{
17906   predicate(UseAVX == 0);
17907   match(Set dst (SubF dst src));
17908 
17909   format %{ "subss   $dst, $src" %}
17910   ins_cost(150);
17911   ins_encode %{
17912     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17913   %}
17914   ins_pipe(pipe_slow);
17915 %}
17916 
17917 instruct subF_mem(regF dst, memory src) %{
17918   predicate(UseAVX == 0);
17919   match(Set dst (SubF dst (LoadF src)));
17920 
17921   format %{ "subss   $dst, $src" %}
17922   ins_cost(150);
17923   ins_encode %{
17924     __ subss($dst$$XMMRegister, $src$$Address);
17925   %}
17926   ins_pipe(pipe_slow);
17927 %}
17928 
17929 instruct subF_imm(regF dst, immF con) %{
17930   predicate(UseAVX == 0);
17931   match(Set dst (SubF dst con));
17932   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17933   ins_cost(150);
17934   ins_encode %{
17935     __ subss($dst$$XMMRegister, $constantaddress($con));
17936   %}
17937   ins_pipe(pipe_slow);
17938 %}
17939 
17940 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17941   predicate(UseAVX > 0);
17942   match(Set dst (SubF src1 src2));
17943 
17944   format %{ "vsubss  $dst, $src1, $src2" %}
17945   ins_cost(150);
17946   ins_encode %{
17947     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17948   %}
17949   ins_pipe(pipe_slow);
17950 %}
17951 
17952 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17953   predicate(UseAVX > 0);
17954   match(Set dst (SubF src1 (LoadF src2)));
17955 
17956   format %{ "vsubss  $dst, $src1, $src2" %}
17957   ins_cost(150);
17958   ins_encode %{
17959     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17960   %}
17961   ins_pipe(pipe_slow);
17962 %}
17963 
17964 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17965   predicate(UseAVX > 0);
17966   match(Set dst (SubF src con));
17967 
17968   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17969   ins_cost(150);
17970   ins_encode %{
17971     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17972   %}
17973   ins_pipe(pipe_slow);
17974 %}
17975 
17976 instruct subD_reg(regD dst, regD src) %{
17977   predicate(UseAVX == 0);
17978   match(Set dst (SubD dst src));
17979 
17980   format %{ "subsd   $dst, $src" %}
17981   ins_cost(150);
17982   ins_encode %{
17983     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17984   %}
17985   ins_pipe(pipe_slow);
17986 %}
17987 
17988 instruct subD_mem(regD dst, memory src) %{
17989   predicate(UseAVX == 0);
17990   match(Set dst (SubD dst (LoadD src)));
17991 
17992   format %{ "subsd   $dst, $src" %}
17993   ins_cost(150);
17994   ins_encode %{
17995     __ subsd($dst$$XMMRegister, $src$$Address);
17996   %}
17997   ins_pipe(pipe_slow);
17998 %}
17999 
18000 instruct subD_imm(regD dst, immD con) %{
18001   predicate(UseAVX == 0);
18002   match(Set dst (SubD dst con));
18003   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18004   ins_cost(150);
18005   ins_encode %{
18006     __ subsd($dst$$XMMRegister, $constantaddress($con));
18007   %}
18008   ins_pipe(pipe_slow);
18009 %}
18010 
18011 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
18012   predicate(UseAVX > 0);
18013   match(Set dst (SubD src1 src2));
18014 
18015   format %{ "vsubsd  $dst, $src1, $src2" %}
18016   ins_cost(150);
18017   ins_encode %{
18018     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18019   %}
18020   ins_pipe(pipe_slow);
18021 %}
18022 
18023 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
18024   predicate(UseAVX > 0);
18025   match(Set dst (SubD src1 (LoadD src2)));
18026 
18027   format %{ "vsubsd  $dst, $src1, $src2" %}
18028   ins_cost(150);
18029   ins_encode %{
18030     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18031   %}
18032   ins_pipe(pipe_slow);
18033 %}
18034 
18035 instruct subD_reg_imm(regD dst, regD src, immD con) %{
18036   predicate(UseAVX > 0);
18037   match(Set dst (SubD src con));
18038 
18039   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18040   ins_cost(150);
18041   ins_encode %{
18042     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18043   %}
18044   ins_pipe(pipe_slow);
18045 %}
18046 
18047 instruct mulF_reg(regF dst, regF src) %{
18048   predicate(UseAVX == 0);
18049   match(Set dst (MulF dst src));
18050 
18051   format %{ "mulss   $dst, $src" %}
18052   ins_cost(150);
18053   ins_encode %{
18054     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
18055   %}
18056   ins_pipe(pipe_slow);
18057 %}
18058 
18059 instruct mulF_mem(regF dst, memory src) %{
18060   predicate(UseAVX == 0);
18061   match(Set dst (MulF dst (LoadF src)));
18062 
18063   format %{ "mulss   $dst, $src" %}
18064   ins_cost(150);
18065   ins_encode %{
18066     __ mulss($dst$$XMMRegister, $src$$Address);
18067   %}
18068   ins_pipe(pipe_slow);
18069 %}
18070 
18071 instruct mulF_imm(regF dst, immF con) %{
18072   predicate(UseAVX == 0);
18073   match(Set dst (MulF dst con));
18074   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18075   ins_cost(150);
18076   ins_encode %{
18077     __ mulss($dst$$XMMRegister, $constantaddress($con));
18078   %}
18079   ins_pipe(pipe_slow);
18080 %}
18081 
18082 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
18083   predicate(UseAVX > 0);
18084   match(Set dst (MulF src1 src2));
18085 
18086   format %{ "vmulss  $dst, $src1, $src2" %}
18087   ins_cost(150);
18088   ins_encode %{
18089     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18090   %}
18091   ins_pipe(pipe_slow);
18092 %}
18093 
18094 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
18095   predicate(UseAVX > 0);
18096   match(Set dst (MulF src1 (LoadF src2)));
18097 
18098   format %{ "vmulss  $dst, $src1, $src2" %}
18099   ins_cost(150);
18100   ins_encode %{
18101     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18102   %}
18103   ins_pipe(pipe_slow);
18104 %}
18105 
18106 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
18107   predicate(UseAVX > 0);
18108   match(Set dst (MulF src con));
18109 
18110   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18111   ins_cost(150);
18112   ins_encode %{
18113     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18114   %}
18115   ins_pipe(pipe_slow);
18116 %}
18117 
18118 instruct mulD_reg(regD dst, regD src) %{
18119   predicate(UseAVX == 0);
18120   match(Set dst (MulD dst src));
18121 
18122   format %{ "mulsd   $dst, $src" %}
18123   ins_cost(150);
18124   ins_encode %{
18125     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
18126   %}
18127   ins_pipe(pipe_slow);
18128 %}
18129 
18130 instruct mulD_mem(regD dst, memory src) %{
18131   predicate(UseAVX == 0);
18132   match(Set dst (MulD dst (LoadD src)));
18133 
18134   format %{ "mulsd   $dst, $src" %}
18135   ins_cost(150);
18136   ins_encode %{
18137     __ mulsd($dst$$XMMRegister, $src$$Address);
18138   %}
18139   ins_pipe(pipe_slow);
18140 %}
18141 
18142 instruct mulD_imm(regD dst, immD con) %{
18143   predicate(UseAVX == 0);
18144   match(Set dst (MulD dst con));
18145   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18146   ins_cost(150);
18147   ins_encode %{
18148     __ mulsd($dst$$XMMRegister, $constantaddress($con));
18149   %}
18150   ins_pipe(pipe_slow);
18151 %}
18152 
18153 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
18154   predicate(UseAVX > 0);
18155   match(Set dst (MulD src1 src2));
18156 
18157   format %{ "vmulsd  $dst, $src1, $src2" %}
18158   ins_cost(150);
18159   ins_encode %{
18160     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18161   %}
18162   ins_pipe(pipe_slow);
18163 %}
18164 
18165 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
18166   predicate(UseAVX > 0);
18167   match(Set dst (MulD src1 (LoadD src2)));
18168 
18169   format %{ "vmulsd  $dst, $src1, $src2" %}
18170   ins_cost(150);
18171   ins_encode %{
18172     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18173   %}
18174   ins_pipe(pipe_slow);
18175 %}
18176 
18177 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
18178   predicate(UseAVX > 0);
18179   match(Set dst (MulD src con));
18180 
18181   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18182   ins_cost(150);
18183   ins_encode %{
18184     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18185   %}
18186   ins_pipe(pipe_slow);
18187 %}
18188 
18189 instruct divF_reg(regF dst, regF src) %{
18190   predicate(UseAVX == 0);
18191   match(Set dst (DivF dst src));
18192 
18193   format %{ "divss   $dst, $src" %}
18194   ins_cost(150);
18195   ins_encode %{
18196     __ divss($dst$$XMMRegister, $src$$XMMRegister);
18197   %}
18198   ins_pipe(pipe_slow);
18199 %}
18200 
18201 instruct divF_mem(regF dst, memory src) %{
18202   predicate(UseAVX == 0);
18203   match(Set dst (DivF dst (LoadF src)));
18204 
18205   format %{ "divss   $dst, $src" %}
18206   ins_cost(150);
18207   ins_encode %{
18208     __ divss($dst$$XMMRegister, $src$$Address);
18209   %}
18210   ins_pipe(pipe_slow);
18211 %}
18212 
18213 instruct divF_imm(regF dst, immF con) %{
18214   predicate(UseAVX == 0);
18215   match(Set dst (DivF dst con));
18216   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18217   ins_cost(150);
18218   ins_encode %{
18219     __ divss($dst$$XMMRegister, $constantaddress($con));
18220   %}
18221   ins_pipe(pipe_slow);
18222 %}
18223 
18224 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
18225   predicate(UseAVX > 0);
18226   match(Set dst (DivF src1 src2));
18227 
18228   format %{ "vdivss  $dst, $src1, $src2" %}
18229   ins_cost(150);
18230   ins_encode %{
18231     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18232   %}
18233   ins_pipe(pipe_slow);
18234 %}
18235 
18236 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
18237   predicate(UseAVX > 0);
18238   match(Set dst (DivF src1 (LoadF src2)));
18239 
18240   format %{ "vdivss  $dst, $src1, $src2" %}
18241   ins_cost(150);
18242   ins_encode %{
18243     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18244   %}
18245   ins_pipe(pipe_slow);
18246 %}
18247 
18248 instruct divF_reg_imm(regF dst, regF src, immF con) %{
18249   predicate(UseAVX > 0);
18250   match(Set dst (DivF src con));
18251 
18252   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18253   ins_cost(150);
18254   ins_encode %{
18255     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18256   %}
18257   ins_pipe(pipe_slow);
18258 %}
18259 
18260 instruct divD_reg(regD dst, regD src) %{
18261   predicate(UseAVX == 0);
18262   match(Set dst (DivD dst src));
18263 
18264   format %{ "divsd   $dst, $src" %}
18265   ins_cost(150);
18266   ins_encode %{
18267     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
18268   %}
18269   ins_pipe(pipe_slow);
18270 %}
18271 
18272 instruct divD_mem(regD dst, memory src) %{
18273   predicate(UseAVX == 0);
18274   match(Set dst (DivD dst (LoadD src)));
18275 
18276   format %{ "divsd   $dst, $src" %}
18277   ins_cost(150);
18278   ins_encode %{
18279     __ divsd($dst$$XMMRegister, $src$$Address);
18280   %}
18281   ins_pipe(pipe_slow);
18282 %}
18283 
18284 instruct divD_imm(regD dst, immD con) %{
18285   predicate(UseAVX == 0);
18286   match(Set dst (DivD dst con));
18287   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18288   ins_cost(150);
18289   ins_encode %{
18290     __ divsd($dst$$XMMRegister, $constantaddress($con));
18291   %}
18292   ins_pipe(pipe_slow);
18293 %}
18294 
18295 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
18296   predicate(UseAVX > 0);
18297   match(Set dst (DivD src1 src2));
18298 
18299   format %{ "vdivsd  $dst, $src1, $src2" %}
18300   ins_cost(150);
18301   ins_encode %{
18302     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18303   %}
18304   ins_pipe(pipe_slow);
18305 %}
18306 
18307 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18308   predicate(UseAVX > 0);
18309   match(Set dst (DivD src1 (LoadD src2)));
18310 
18311   format %{ "vdivsd  $dst, $src1, $src2" %}
18312   ins_cost(150);
18313   ins_encode %{
18314     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18315   %}
18316   ins_pipe(pipe_slow);
18317 %}
18318 
18319 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18320   predicate(UseAVX > 0);
18321   match(Set dst (DivD src con));
18322 
18323   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18324   ins_cost(150);
18325   ins_encode %{
18326     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18327   %}
18328   ins_pipe(pipe_slow);
18329 %}
18330 
18331 instruct absF_reg(regF dst) %{
18332   predicate(UseAVX == 0);
18333   match(Set dst (AbsF dst));
18334   ins_cost(150);
18335   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
18336   ins_encode %{
18337     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18338   %}
18339   ins_pipe(pipe_slow);
18340 %}
18341 
18342 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18343   predicate(UseAVX > 0);
18344   match(Set dst (AbsF src));
18345   ins_cost(150);
18346   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18347   ins_encode %{
18348     int vlen_enc = Assembler::AVX_128bit;
18349     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18350               ExternalAddress(float_signmask()), vlen_enc);
18351   %}
18352   ins_pipe(pipe_slow);
18353 %}
18354 
18355 instruct absD_reg(regD dst) %{
18356   predicate(UseAVX == 0);
18357   match(Set dst (AbsD dst));
18358   ins_cost(150);
18359   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
18360             "# abs double by sign masking" %}
18361   ins_encode %{
18362     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18363   %}
18364   ins_pipe(pipe_slow);
18365 %}
18366 
18367 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18368   predicate(UseAVX > 0);
18369   match(Set dst (AbsD src));
18370   ins_cost(150);
18371   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
18372             "# abs double by sign masking" %}
18373   ins_encode %{
18374     int vlen_enc = Assembler::AVX_128bit;
18375     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18376               ExternalAddress(double_signmask()), vlen_enc);
18377   %}
18378   ins_pipe(pipe_slow);
18379 %}
18380 
18381 instruct negF_reg(regF dst) %{
18382   predicate(UseAVX == 0);
18383   match(Set dst (NegF dst));
18384   ins_cost(150);
18385   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
18386   ins_encode %{
18387     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18388   %}
18389   ins_pipe(pipe_slow);
18390 %}
18391 
18392 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18393   predicate(UseAVX > 0);
18394   match(Set dst (NegF src));
18395   ins_cost(150);
18396   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18397   ins_encode %{
18398     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18399                  ExternalAddress(float_signflip()));
18400   %}
18401   ins_pipe(pipe_slow);
18402 %}
18403 
18404 instruct negD_reg(regD dst) %{
18405   predicate(UseAVX == 0);
18406   match(Set dst (NegD dst));
18407   ins_cost(150);
18408   format %{ "xorpd   $dst, [0x8000000000000000]\t"
18409             "# neg double by sign flipping" %}
18410   ins_encode %{
18411     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18412   %}
18413   ins_pipe(pipe_slow);
18414 %}
18415 
18416 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18417   predicate(UseAVX > 0);
18418   match(Set dst (NegD src));
18419   ins_cost(150);
18420   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
18421             "# neg double by sign flipping" %}
18422   ins_encode %{
18423     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18424                  ExternalAddress(double_signflip()));
18425   %}
18426   ins_pipe(pipe_slow);
18427 %}
18428 
18429 // sqrtss instruction needs destination register to be pre initialized for best performance
18430 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18431 instruct sqrtF_reg(regF dst) %{
18432   match(Set dst (SqrtF dst));
18433   format %{ "sqrtss  $dst, $dst" %}
18434   ins_encode %{
18435     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18436   %}
18437   ins_pipe(pipe_slow);
18438 %}
18439 
18440 // sqrtsd instruction needs destination register to be pre initialized for best performance
18441 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18442 instruct sqrtD_reg(regD dst) %{
18443   match(Set dst (SqrtD dst));
18444   format %{ "sqrtsd  $dst, $dst" %}
18445   ins_encode %{
18446     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18447   %}
18448   ins_pipe(pipe_slow);
18449 %}
18450 
18451 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18452   effect(TEMP tmp);
18453   match(Set dst (ConvF2HF src));
18454   ins_cost(125);
18455   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18456   ins_encode %{
18457     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18458   %}
18459   ins_pipe( pipe_slow );
18460 %}
18461 
18462 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18463   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18464   effect(TEMP ktmp, TEMP rtmp);
18465   match(Set mem (StoreC mem (ConvF2HF src)));
18466   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18467   ins_encode %{
18468     __ movl($rtmp$$Register, 0x1);
18469     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18470     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18471   %}
18472   ins_pipe( pipe_slow );
18473 %}
18474 
18475 instruct vconvF2HF(vec dst, vec src) %{
18476   match(Set dst (VectorCastF2HF src));
18477   format %{ "vector_conv_F2HF $dst $src" %}
18478   ins_encode %{
18479     int vlen_enc = vector_length_encoding(this, $src);
18480     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18481   %}
18482   ins_pipe( pipe_slow );
18483 %}
18484 
18485 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18486   predicate(n->as_StoreVector()->memory_size() >= 16);
18487   match(Set mem (StoreVector mem (VectorCastF2HF src)));
18488   format %{ "vcvtps2ph $mem,$src" %}
18489   ins_encode %{
18490     int vlen_enc = vector_length_encoding(this, $src);
18491     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18492   %}
18493   ins_pipe( pipe_slow );
18494 %}
18495 
18496 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18497   match(Set dst (ConvHF2F src));
18498   format %{ "vcvtph2ps $dst,$src" %}
18499   ins_encode %{
18500     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18501   %}
18502   ins_pipe( pipe_slow );
18503 %}
18504 
18505 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18506   match(Set dst (VectorCastHF2F (LoadVector mem)));
18507   format %{ "vcvtph2ps $dst,$mem" %}
18508   ins_encode %{
18509     int vlen_enc = vector_length_encoding(this);
18510     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18511   %}
18512   ins_pipe( pipe_slow );
18513 %}
18514 
18515 instruct vconvHF2F(vec dst, vec src) %{
18516   match(Set dst (VectorCastHF2F src));
18517   ins_cost(125);
18518   format %{ "vector_conv_HF2F $dst,$src" %}
18519   ins_encode %{
18520     int vlen_enc = vector_length_encoding(this);
18521     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18522   %}
18523   ins_pipe( pipe_slow );
18524 %}
18525 
18526 // ---------------------------------------- VectorReinterpret ------------------------------------
18527 instruct reinterpret_mask(kReg dst) %{
18528   predicate(n->bottom_type()->isa_pvectmask() &&
18529             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18530   match(Set dst (VectorReinterpret dst));
18531   ins_cost(125);
18532   format %{ "vector_reinterpret $dst\t!" %}
18533   ins_encode %{
18534     // empty
18535   %}
18536   ins_pipe( pipe_slow );
18537 %}
18538 
18539 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18540   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18541             n->bottom_type()->isa_pvectmask() &&
18542             n->in(1)->bottom_type()->isa_pvectmask() &&
18543             n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_SHORT &&
18544             n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18545   match(Set dst (VectorReinterpret src));
18546   effect(TEMP xtmp);
18547   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18548   ins_encode %{
18549      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18550      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18551      assert(src_sz == dst_sz , "src and dst size mismatch");
18552      int vlen_enc = vector_length_encoding(src_sz);
18553      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18554      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18555   %}
18556   ins_pipe( pipe_slow );
18557 %}
18558 
18559 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18560   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18561             n->bottom_type()->isa_pvectmask() &&
18562             n->in(1)->bottom_type()->isa_pvectmask() &&
18563             (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_INT ||
18564              n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_FLOAT) &&
18565             n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18566   match(Set dst (VectorReinterpret src));
18567   effect(TEMP xtmp);
18568   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18569   ins_encode %{
18570      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18571      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18572      assert(src_sz == dst_sz , "src and dst size mismatch");
18573      int vlen_enc = vector_length_encoding(src_sz);
18574      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18575      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18576   %}
18577   ins_pipe( pipe_slow );
18578 %}
18579 
18580 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18581   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18582             n->bottom_type()->isa_pvectmask() &&
18583             n->in(1)->bottom_type()->isa_pvectmask() &&
18584             (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_LONG ||
18585              n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_DOUBLE) &&
18586             n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18587   match(Set dst (VectorReinterpret src));
18588   effect(TEMP xtmp);
18589   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18590   ins_encode %{
18591      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18592      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18593      assert(src_sz == dst_sz , "src and dst size mismatch");
18594      int vlen_enc = vector_length_encoding(src_sz);
18595      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18596      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18597   %}
18598   ins_pipe( pipe_slow );
18599 %}
18600 
18601 instruct reinterpret(vec dst) %{
18602   predicate(!n->bottom_type()->isa_pvectmask() &&
18603             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18604   match(Set dst (VectorReinterpret dst));
18605   ins_cost(125);
18606   format %{ "vector_reinterpret $dst\t!" %}
18607   ins_encode %{
18608     // empty
18609   %}
18610   ins_pipe( pipe_slow );
18611 %}
18612 
18613 instruct reinterpret_expand(vec dst, vec src) %{
18614   predicate(UseAVX == 0 &&
18615             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18616   match(Set dst (VectorReinterpret src));
18617   ins_cost(125);
18618   effect(TEMP dst);
18619   format %{ "vector_reinterpret_expand $dst,$src" %}
18620   ins_encode %{
18621     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18622     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18623 
18624     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18625     if (src_vlen_in_bytes == 4) {
18626       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18627     } else {
18628       assert(src_vlen_in_bytes == 8, "");
18629       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18630     }
18631     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18632   %}
18633   ins_pipe( pipe_slow );
18634 %}
18635 
18636 instruct vreinterpret_expand4(legVec dst, vec src) %{
18637   predicate(UseAVX > 0 &&
18638             !n->bottom_type()->isa_pvectmask() &&
18639             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18640             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18641   match(Set dst (VectorReinterpret src));
18642   ins_cost(125);
18643   format %{ "vector_reinterpret_expand $dst,$src" %}
18644   ins_encode %{
18645     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18646   %}
18647   ins_pipe( pipe_slow );
18648 %}
18649 
18650 
18651 instruct vreinterpret_expand(legVec dst, vec src) %{
18652   predicate(UseAVX > 0 &&
18653             !n->bottom_type()->isa_pvectmask() &&
18654             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18655             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18656   match(Set dst (VectorReinterpret src));
18657   ins_cost(125);
18658   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18659   ins_encode %{
18660     switch (Matcher::vector_length_in_bytes(this, $src)) {
18661       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18662       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18663       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18664       default: ShouldNotReachHere();
18665     }
18666   %}
18667   ins_pipe( pipe_slow );
18668 %}
18669 
18670 instruct reinterpret_shrink(vec dst, legVec src) %{
18671   predicate(!n->bottom_type()->isa_pvectmask() &&
18672             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18673   match(Set dst (VectorReinterpret src));
18674   ins_cost(125);
18675   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18676   ins_encode %{
18677     switch (Matcher::vector_length_in_bytes(this)) {
18678       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18679       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18680       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18681       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18682       default: ShouldNotReachHere();
18683     }
18684   %}
18685   ins_pipe( pipe_slow );
18686 %}
18687 
18688 // ----------------------------------------------------------------------------------------------------
18689 
18690 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18691   match(Set dst (RoundDoubleMode src rmode));
18692   format %{ "roundsd $dst,$src" %}
18693   ins_cost(150);
18694   ins_encode %{
18695     assert(UseSSE >= 4, "required");
18696     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18697       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18698     }
18699     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18700   %}
18701   ins_pipe(pipe_slow);
18702 %}
18703 
18704 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18705   match(Set dst (RoundDoubleMode con rmode));
18706   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18707   ins_cost(150);
18708   ins_encode %{
18709     assert(UseSSE >= 4, "required");
18710     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18711   %}
18712   ins_pipe(pipe_slow);
18713 %}
18714 
18715 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18716   predicate(Matcher::vector_length(n) < 8);
18717   match(Set dst (RoundDoubleModeV src rmode));
18718   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18719   ins_encode %{
18720     assert(UseAVX > 0, "required");
18721     int vlen_enc = vector_length_encoding(this);
18722     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18723   %}
18724   ins_pipe( pipe_slow );
18725 %}
18726 
18727 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18728   predicate(Matcher::vector_length(n) == 8);
18729   match(Set dst (RoundDoubleModeV src rmode));
18730   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18731   ins_encode %{
18732     assert(UseAVX > 2, "required");
18733     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18734   %}
18735   ins_pipe( pipe_slow );
18736 %}
18737 
18738 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18739   predicate(Matcher::vector_length(n) < 8);
18740   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18741   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18742   ins_encode %{
18743     assert(UseAVX > 0, "required");
18744     int vlen_enc = vector_length_encoding(this);
18745     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18746   %}
18747   ins_pipe( pipe_slow );
18748 %}
18749 
18750 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18751   predicate(Matcher::vector_length(n) == 8);
18752   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18753   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18754   ins_encode %{
18755     assert(UseAVX > 2, "required");
18756     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18757   %}
18758   ins_pipe( pipe_slow );
18759 %}
18760 
18761 instruct onspinwait() %{
18762   match(OnSpinWait);
18763   ins_cost(200);
18764 
18765   format %{
18766     $$template
18767     $$emit$$"pause\t! membar_onspinwait"
18768   %}
18769   ins_encode %{
18770     __ pause();
18771   %}
18772   ins_pipe(pipe_slow);
18773 %}
18774 
18775 // a * b + c
18776 instruct fmaD_reg(regD a, regD b, regD c) %{
18777   match(Set c (FmaD  c (Binary a b)));
18778   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18779   ins_cost(150);
18780   ins_encode %{
18781     assert(UseFMA, "Needs FMA instructions support.");
18782     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18783   %}
18784   ins_pipe( pipe_slow );
18785 %}
18786 
18787 // a * b + c
18788 instruct fmaF_reg(regF a, regF b, regF c) %{
18789   match(Set c (FmaF  c (Binary a b)));
18790   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18791   ins_cost(150);
18792   ins_encode %{
18793     assert(UseFMA, "Needs FMA instructions support.");
18794     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18795   %}
18796   ins_pipe( pipe_slow );
18797 %}
18798 
18799 // ====================VECTOR INSTRUCTIONS=====================================
18800 
18801 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18802 instruct MoveVec2Leg(legVec dst, vec src) %{
18803   match(Set dst src);
18804   format %{ "" %}
18805   ins_encode %{
18806     ShouldNotReachHere();
18807   %}
18808   ins_pipe( fpu_reg_reg );
18809 %}
18810 
18811 instruct MoveLeg2Vec(vec dst, legVec src) %{
18812   match(Set dst src);
18813   format %{ "" %}
18814   ins_encode %{
18815     ShouldNotReachHere();
18816   %}
18817   ins_pipe( fpu_reg_reg );
18818 %}
18819 
18820 // ============================================================================
18821 
18822 // Load vectors generic operand pattern
18823 instruct loadV(vec dst, memory mem) %{
18824   match(Set dst (LoadVector mem));
18825   ins_cost(125);
18826   format %{ "load_vector $dst,$mem" %}
18827   ins_encode %{
18828     BasicType bt = Matcher::vector_element_basic_type(this);
18829     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18830   %}
18831   ins_pipe( pipe_slow );
18832 %}
18833 
18834 // Store vectors generic operand pattern.
18835 instruct storeV(memory mem, vec src) %{
18836   match(Set mem (StoreVector mem src));
18837   ins_cost(145);
18838   format %{ "store_vector $mem,$src\n\t" %}
18839   ins_encode %{
18840     switch (Matcher::vector_length_in_bytes(this, $src)) {
18841       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18842       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18843       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18844       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18845       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18846       default: ShouldNotReachHere();
18847     }
18848   %}
18849   ins_pipe( pipe_slow );
18850 %}
18851 
18852 // ---------------------------------------- Gather ------------------------------------
18853 
18854 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18855 
18856 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18857   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18858             Matcher::vector_length_in_bytes(n) <= 32);
18859   match(Set dst (LoadVectorGather mem idx));
18860   effect(TEMP dst, TEMP tmp, TEMP mask);
18861   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18862   ins_encode %{
18863     int vlen_enc = vector_length_encoding(this);
18864     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18865     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18866     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18867     __ lea($tmp$$Register, $mem$$Address);
18868     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18869   %}
18870   ins_pipe( pipe_slow );
18871 %}
18872 
18873 
18874 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18875   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18876             !is_subword_type(Matcher::vector_element_basic_type(n)));
18877   match(Set dst (LoadVectorGather mem idx));
18878   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18879   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18880   ins_encode %{
18881     int vlen_enc = vector_length_encoding(this);
18882     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18883     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18884     __ lea($tmp$$Register, $mem$$Address);
18885     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18886   %}
18887   ins_pipe( pipe_slow );
18888 %}
18889 
18890 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18891   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18892             !is_subword_type(Matcher::vector_element_basic_type(n)));
18893   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18894   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18895   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18896   ins_encode %{
18897     assert(UseAVX > 2, "sanity");
18898     int vlen_enc = vector_length_encoding(this);
18899     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18900     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18901     // Note: Since gather instruction partially updates the opmask register used
18902     // for predication hense moving mask operand to a temporary.
18903     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18904     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18905     __ lea($tmp$$Register, $mem$$Address);
18906     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18907   %}
18908   ins_pipe( pipe_slow );
18909 %}
18910 
18911 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18912   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18913   match(Set dst (LoadVectorGather mem idx_base));
18914   effect(TEMP tmp, TEMP rtmp);
18915   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18916   ins_encode %{
18917     int vlen_enc = vector_length_encoding(this);
18918     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18919     __ lea($tmp$$Register, $mem$$Address);
18920     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18921   %}
18922   ins_pipe( pipe_slow );
18923 %}
18924 
18925 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18926                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18927   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18928   match(Set dst (LoadVectorGather mem idx_base));
18929   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18930   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18931   ins_encode %{
18932     int vlen_enc = vector_length_encoding(this);
18933     int vector_len = Matcher::vector_length(this);
18934     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18935     __ lea($tmp$$Register, $mem$$Address);
18936     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18937     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18938                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18939   %}
18940   ins_pipe( pipe_slow );
18941 %}
18942 
18943 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18944   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18945   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18946   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18947   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18948   ins_encode %{
18949     int vlen_enc = vector_length_encoding(this);
18950     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18951     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18952     __ lea($tmp$$Register, $mem$$Address);
18953     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18954     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18955   %}
18956   ins_pipe( pipe_slow );
18957 %}
18958 
18959 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18960                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18961   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18962   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18963   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18964   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18965   ins_encode %{
18966     int vlen_enc = vector_length_encoding(this);
18967     int vector_len = Matcher::vector_length(this);
18968     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18969     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18970     __ lea($tmp$$Register, $mem$$Address);
18971     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18972     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18973     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18974                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18975   %}
18976   ins_pipe( pipe_slow );
18977 %}
18978 
18979 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18980   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18981   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18982   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18983   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18984   ins_encode %{
18985     int vlen_enc = vector_length_encoding(this);
18986     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18987     __ lea($tmp$$Register, $mem$$Address);
18988     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18989     if (elem_bt == T_SHORT) {
18990       __ movl($mask_idx$$Register, 0x55555555);
18991       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18992     }
18993     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18994     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18995   %}
18996   ins_pipe( pipe_slow );
18997 %}
18998 
18999 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
19000                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
19001   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
19002   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
19003   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
19004   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
19005   ins_encode %{
19006     int vlen_enc = vector_length_encoding(this);
19007     int vector_len = Matcher::vector_length(this);
19008     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19009     __ lea($tmp$$Register, $mem$$Address);
19010     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
19011     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
19012     if (elem_bt == T_SHORT) {
19013       __ movl($mask_idx$$Register, 0x55555555);
19014       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
19015     }
19016     __ xorl($mask_idx$$Register, $mask_idx$$Register);
19017     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
19018                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
19019   %}
19020   ins_pipe( pipe_slow );
19021 %}
19022 
19023 // ====================Scatter=======================================
19024 
19025 // Scatter INT, LONG, FLOAT, DOUBLE
19026 
19027 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
19028   predicate(UseAVX > 2);
19029   match(Set mem (StoreVectorScatter mem (Binary src idx)));
19030   effect(TEMP tmp, TEMP ktmp);
19031   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
19032   ins_encode %{
19033     int vlen_enc = vector_length_encoding(this, $src);
19034     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
19035 
19036     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
19037     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
19038 
19039     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
19040     __ lea($tmp$$Register, $mem$$Address);
19041     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
19042   %}
19043   ins_pipe( pipe_slow );
19044 %}
19045 
19046 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
19047   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
19048   effect(TEMP tmp, TEMP ktmp);
19049   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
19050   ins_encode %{
19051     int vlen_enc = vector_length_encoding(this, $src);
19052     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
19053     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
19054     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
19055     // Note: Since scatter instruction partially updates the opmask register used
19056     // for predication hense moving mask operand to a temporary.
19057     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
19058     __ lea($tmp$$Register, $mem$$Address);
19059     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
19060   %}
19061   ins_pipe( pipe_slow );
19062 %}
19063 
19064 // ====================REPLICATE=======================================
19065 
19066 // Replicate byte scalar to be vector
19067 instruct vReplB_reg(vec dst, rRegI src) %{
19068   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
19069   match(Set dst (Replicate src));
19070   format %{ "replicateB $dst,$src" %}
19071   ins_encode %{
19072     uint vlen = Matcher::vector_length(this);
19073     if (UseAVX >= 2) {
19074       int vlen_enc = vector_length_encoding(this);
19075       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19076         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
19077         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
19078       } else {
19079         __ movdl($dst$$XMMRegister, $src$$Register);
19080         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19081       }
19082     } else {
19083        assert(UseAVX < 2, "");
19084       __ movdl($dst$$XMMRegister, $src$$Register);
19085       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
19086       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19087       if (vlen >= 16) {
19088         assert(vlen == 16, "");
19089         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19090       }
19091     }
19092   %}
19093   ins_pipe( pipe_slow );
19094 %}
19095 
19096 instruct ReplB_mem(vec dst, memory mem) %{
19097   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
19098   match(Set dst (Replicate (LoadB mem)));
19099   format %{ "replicateB $dst,$mem" %}
19100   ins_encode %{
19101     int vlen_enc = vector_length_encoding(this);
19102     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
19103   %}
19104   ins_pipe( pipe_slow );
19105 %}
19106 
19107 // ====================ReplicateS=======================================
19108 
19109 instruct vReplS_reg(vec dst, rRegI src) %{
19110   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
19111   match(Set dst (Replicate src));
19112   format %{ "replicateS $dst,$src" %}
19113   ins_encode %{
19114     uint vlen = Matcher::vector_length(this);
19115     int vlen_enc = vector_length_encoding(this);
19116     if (UseAVX >= 2) {
19117       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19118         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
19119         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
19120       } else {
19121         __ movdl($dst$$XMMRegister, $src$$Register);
19122         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19123       }
19124     } else {
19125       assert(UseAVX < 2, "");
19126       __ movdl($dst$$XMMRegister, $src$$Register);
19127       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19128       if (vlen >= 8) {
19129         assert(vlen == 8, "");
19130         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19131       }
19132     }
19133   %}
19134   ins_pipe( pipe_slow );
19135 %}
19136 
19137 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
19138   match(Set dst (Replicate con));
19139   effect(TEMP rtmp);
19140   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
19141   ins_encode %{
19142     int vlen_enc = vector_length_encoding(this);
19143     BasicType bt = Matcher::vector_element_basic_type(this);
19144     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
19145     __ movl($rtmp$$Register, $con$$constant);
19146     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19147   %}
19148   ins_pipe( pipe_slow );
19149 %}
19150 
19151 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
19152   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
19153   match(Set dst (Replicate src));
19154   effect(TEMP rtmp);
19155   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
19156   ins_encode %{
19157     int vlen_enc = vector_length_encoding(this);
19158     __ evmovw($rtmp$$Register, $src$$XMMRegister);
19159     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19160   %}
19161   ins_pipe( pipe_slow );
19162 %}
19163 
19164 instruct ReplS_mem(vec dst, memory mem) %{
19165   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
19166   match(Set dst (Replicate (LoadS mem)));
19167   format %{ "replicateS $dst,$mem" %}
19168   ins_encode %{
19169     int vlen_enc = vector_length_encoding(this);
19170     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
19171   %}
19172   ins_pipe( pipe_slow );
19173 %}
19174 
19175 // ====================ReplicateI=======================================
19176 
19177 instruct ReplI_reg(vec dst, rRegI src) %{
19178   predicate(Matcher::vector_element_basic_type(n) == T_INT);
19179   match(Set dst (Replicate src));
19180   format %{ "replicateI $dst,$src" %}
19181   ins_encode %{
19182     uint vlen = Matcher::vector_length(this);
19183     int vlen_enc = vector_length_encoding(this);
19184     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19185       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
19186     } else if (VM_Version::supports_avx2()) {
19187       __ movdl($dst$$XMMRegister, $src$$Register);
19188       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19189     } else {
19190       __ movdl($dst$$XMMRegister, $src$$Register);
19191       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19192     }
19193   %}
19194   ins_pipe( pipe_slow );
19195 %}
19196 
19197 instruct ReplI_mem(vec dst, memory mem) %{
19198   predicate(Matcher::vector_element_basic_type(n) == T_INT);
19199   match(Set dst (Replicate (LoadI mem)));
19200   format %{ "replicateI $dst,$mem" %}
19201   ins_encode %{
19202     int vlen_enc = vector_length_encoding(this);
19203     if (VM_Version::supports_avx2()) {
19204       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19205     } else if (VM_Version::supports_avx()) {
19206       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19207     } else {
19208       __ movdl($dst$$XMMRegister, $mem$$Address);
19209       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19210     }
19211   %}
19212   ins_pipe( pipe_slow );
19213 %}
19214 
19215 instruct ReplI_imm(vec dst, immI con) %{
19216   predicate(Matcher::is_non_long_integral_vector(n));
19217   match(Set dst (Replicate con));
19218   format %{ "replicateI $dst,$con" %}
19219   ins_encode %{
19220     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
19221                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
19222                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
19223     BasicType bt = Matcher::vector_element_basic_type(this);
19224     int vlen = Matcher::vector_length_in_bytes(this);
19225     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
19226   %}
19227   ins_pipe( pipe_slow );
19228 %}
19229 
19230 // Replicate scalar zero to be vector
19231 instruct ReplI_zero(vec dst, immI_0 zero) %{
19232   predicate(Matcher::is_non_long_integral_vector(n));
19233   match(Set dst (Replicate zero));
19234   format %{ "replicateI $dst,$zero" %}
19235   ins_encode %{
19236     int vlen_enc = vector_length_encoding(this);
19237     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19238       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19239     } else {
19240       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19241     }
19242   %}
19243   ins_pipe( fpu_reg_reg );
19244 %}
19245 
19246 instruct ReplI_M1(vec dst, immI_M1 con) %{
19247   predicate(Matcher::is_non_long_integral_vector(n));
19248   match(Set dst (Replicate con));
19249   format %{ "vallones $dst" %}
19250   ins_encode %{
19251     int vector_len = vector_length_encoding(this);
19252     __ vallones($dst$$XMMRegister, vector_len);
19253   %}
19254   ins_pipe( pipe_slow );
19255 %}
19256 
19257 // ====================ReplicateL=======================================
19258 
19259 // Replicate long (8 byte) scalar to be vector
19260 instruct ReplL_reg(vec dst, rRegL src) %{
19261   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19262   match(Set dst (Replicate src));
19263   format %{ "replicateL $dst,$src" %}
19264   ins_encode %{
19265     int vlen = Matcher::vector_length(this);
19266     int vlen_enc = vector_length_encoding(this);
19267     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19268       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
19269     } else if (VM_Version::supports_avx2()) {
19270       __ movdq($dst$$XMMRegister, $src$$Register);
19271       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19272     } else {
19273       __ movdq($dst$$XMMRegister, $src$$Register);
19274       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19275     }
19276   %}
19277   ins_pipe( pipe_slow );
19278 %}
19279 
19280 instruct ReplL_mem(vec dst, memory mem) %{
19281   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19282   match(Set dst (Replicate (LoadL mem)));
19283   format %{ "replicateL $dst,$mem" %}
19284   ins_encode %{
19285     int vlen_enc = vector_length_encoding(this);
19286     if (VM_Version::supports_avx2()) {
19287       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
19288     } else if (VM_Version::supports_sse3()) {
19289       __ movddup($dst$$XMMRegister, $mem$$Address);
19290     } else {
19291       __ movq($dst$$XMMRegister, $mem$$Address);
19292       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19293     }
19294   %}
19295   ins_pipe( pipe_slow );
19296 %}
19297 
19298 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
19299 instruct ReplL_imm(vec dst, immL con) %{
19300   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19301   match(Set dst (Replicate con));
19302   format %{ "replicateL $dst,$con" %}
19303   ins_encode %{
19304     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19305     int vlen = Matcher::vector_length_in_bytes(this);
19306     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
19307   %}
19308   ins_pipe( pipe_slow );
19309 %}
19310 
19311 instruct ReplL_zero(vec dst, immL0 zero) %{
19312   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19313   match(Set dst (Replicate zero));
19314   format %{ "replicateL $dst,$zero" %}
19315   ins_encode %{
19316     int vlen_enc = vector_length_encoding(this);
19317     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19318       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19319     } else {
19320       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19321     }
19322   %}
19323   ins_pipe( fpu_reg_reg );
19324 %}
19325 
19326 instruct ReplL_M1(vec dst, immL_M1 con) %{
19327   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19328   match(Set dst (Replicate con));
19329   format %{ "vallones $dst" %}
19330   ins_encode %{
19331     int vector_len = vector_length_encoding(this);
19332     __ vallones($dst$$XMMRegister, vector_len);
19333   %}
19334   ins_pipe( pipe_slow );
19335 %}
19336 
19337 // ====================ReplicateF=======================================
19338 
19339 instruct vReplF_reg(vec dst, vlRegF src) %{
19340   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19341   match(Set dst (Replicate src));
19342   format %{ "replicateF $dst,$src" %}
19343   ins_encode %{
19344     uint vlen = Matcher::vector_length(this);
19345     int vlen_enc = vector_length_encoding(this);
19346     if (vlen <= 4) {
19347       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19348     } else if (VM_Version::supports_avx2()) {
19349       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19350     } else {
19351       assert(vlen == 8, "sanity");
19352       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19353       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19354     }
19355   %}
19356   ins_pipe( pipe_slow );
19357 %}
19358 
19359 instruct ReplF_reg(vec dst, vlRegF src) %{
19360   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19361   match(Set dst (Replicate src));
19362   format %{ "replicateF $dst,$src" %}
19363   ins_encode %{
19364     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19365   %}
19366   ins_pipe( pipe_slow );
19367 %}
19368 
19369 instruct ReplF_mem(vec dst, memory mem) %{
19370   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19371   match(Set dst (Replicate (LoadF mem)));
19372   format %{ "replicateF $dst,$mem" %}
19373   ins_encode %{
19374     int vlen_enc = vector_length_encoding(this);
19375     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19376   %}
19377   ins_pipe( pipe_slow );
19378 %}
19379 
19380 // Replicate float scalar immediate to be vector by loading from const table.
19381 instruct ReplF_imm(vec dst, immF con) %{
19382   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19383   match(Set dst (Replicate con));
19384   format %{ "replicateF $dst,$con" %}
19385   ins_encode %{
19386     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19387                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19388     int vlen = Matcher::vector_length_in_bytes(this);
19389     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19390   %}
19391   ins_pipe( pipe_slow );
19392 %}
19393 
19394 instruct ReplF_zero(vec dst, immF0 zero) %{
19395   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19396   match(Set dst (Replicate zero));
19397   format %{ "replicateF $dst,$zero" %}
19398   ins_encode %{
19399     int vlen_enc = vector_length_encoding(this);
19400     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19401       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19402     } else {
19403       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19404     }
19405   %}
19406   ins_pipe( fpu_reg_reg );
19407 %}
19408 
19409 // ====================ReplicateD=======================================
19410 
19411 // Replicate double (8 bytes) scalar to be vector
19412 instruct vReplD_reg(vec dst, vlRegD src) %{
19413   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19414   match(Set dst (Replicate src));
19415   format %{ "replicateD $dst,$src" %}
19416   ins_encode %{
19417     uint vlen = Matcher::vector_length(this);
19418     int vlen_enc = vector_length_encoding(this);
19419     if (vlen <= 2) {
19420       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19421     } else if (VM_Version::supports_avx2()) {
19422       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19423     } else {
19424       assert(vlen == 4, "sanity");
19425       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19426       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19427     }
19428   %}
19429   ins_pipe( pipe_slow );
19430 %}
19431 
19432 instruct ReplD_reg(vec dst, vlRegD src) %{
19433   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19434   match(Set dst (Replicate src));
19435   format %{ "replicateD $dst,$src" %}
19436   ins_encode %{
19437     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19438   %}
19439   ins_pipe( pipe_slow );
19440 %}
19441 
19442 instruct ReplD_mem(vec dst, memory mem) %{
19443   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19444   match(Set dst (Replicate (LoadD mem)));
19445   format %{ "replicateD $dst,$mem" %}
19446   ins_encode %{
19447     if (Matcher::vector_length(this) >= 4) {
19448       int vlen_enc = vector_length_encoding(this);
19449       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19450     } else {
19451       __ movddup($dst$$XMMRegister, $mem$$Address);
19452     }
19453   %}
19454   ins_pipe( pipe_slow );
19455 %}
19456 
19457 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19458 instruct ReplD_imm(vec dst, immD con) %{
19459   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19460   match(Set dst (Replicate con));
19461   format %{ "replicateD $dst,$con" %}
19462   ins_encode %{
19463     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19464     int vlen = Matcher::vector_length_in_bytes(this);
19465     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19466   %}
19467   ins_pipe( pipe_slow );
19468 %}
19469 
19470 instruct ReplD_zero(vec dst, immD0 zero) %{
19471   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19472   match(Set dst (Replicate zero));
19473   format %{ "replicateD $dst,$zero" %}
19474   ins_encode %{
19475     int vlen_enc = vector_length_encoding(this);
19476     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19477       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19478     } else {
19479       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19480     }
19481   %}
19482   ins_pipe( fpu_reg_reg );
19483 %}
19484 
19485 // ====================VECTOR INSERT=======================================
19486 
19487 instruct insert(vec dst, rRegI val, immU8 idx) %{
19488   predicate(Matcher::vector_length_in_bytes(n) < 32);
19489   match(Set dst (VectorInsert (Binary dst val) idx));
19490   format %{ "vector_insert $dst,$val,$idx" %}
19491   ins_encode %{
19492     assert(UseSSE >= 4, "required");
19493     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19494 
19495     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19496 
19497     assert(is_integral_type(elem_bt), "");
19498     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19499 
19500     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19501   %}
19502   ins_pipe( pipe_slow );
19503 %}
19504 
19505 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19506   predicate(Matcher::vector_length_in_bytes(n) == 32);
19507   match(Set dst (VectorInsert (Binary src val) idx));
19508   effect(TEMP vtmp);
19509   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19510   ins_encode %{
19511     int vlen_enc = Assembler::AVX_256bit;
19512     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19513     int elem_per_lane = 16/type2aelembytes(elem_bt);
19514     int log2epr = log2(elem_per_lane);
19515 
19516     assert(is_integral_type(elem_bt), "sanity");
19517     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19518 
19519     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19520     uint y_idx = ($idx$$constant >> log2epr) & 1;
19521     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19522     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19523     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19524   %}
19525   ins_pipe( pipe_slow );
19526 %}
19527 
19528 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19529   predicate(Matcher::vector_length_in_bytes(n) == 64);
19530   match(Set dst (VectorInsert (Binary src val) idx));
19531   effect(TEMP vtmp);
19532   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19533   ins_encode %{
19534     assert(UseAVX > 2, "sanity");
19535 
19536     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19537     int elem_per_lane = 16/type2aelembytes(elem_bt);
19538     int log2epr = log2(elem_per_lane);
19539 
19540     assert(is_integral_type(elem_bt), "");
19541     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19542 
19543     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19544     uint y_idx = ($idx$$constant >> log2epr) & 3;
19545     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19546     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19547     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19548   %}
19549   ins_pipe( pipe_slow );
19550 %}
19551 
19552 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19553   predicate(Matcher::vector_length(n) == 2);
19554   match(Set dst (VectorInsert (Binary dst val) idx));
19555   format %{ "vector_insert $dst,$val,$idx" %}
19556   ins_encode %{
19557     assert(UseSSE >= 4, "required");
19558     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19559     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19560 
19561     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19562   %}
19563   ins_pipe( pipe_slow );
19564 %}
19565 
19566 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19567   predicate(Matcher::vector_length(n) == 4);
19568   match(Set dst (VectorInsert (Binary src val) idx));
19569   effect(TEMP vtmp);
19570   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19571   ins_encode %{
19572     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19573     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19574 
19575     uint x_idx = $idx$$constant & right_n_bits(1);
19576     uint y_idx = ($idx$$constant >> 1) & 1;
19577     int vlen_enc = Assembler::AVX_256bit;
19578     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19579     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19580     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19581   %}
19582   ins_pipe( pipe_slow );
19583 %}
19584 
19585 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19586   predicate(Matcher::vector_length(n) == 8);
19587   match(Set dst (VectorInsert (Binary src val) idx));
19588   effect(TEMP vtmp);
19589   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19590   ins_encode %{
19591     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19592     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19593 
19594     uint x_idx = $idx$$constant & right_n_bits(1);
19595     uint y_idx = ($idx$$constant >> 1) & 3;
19596     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19597     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19598     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19599   %}
19600   ins_pipe( pipe_slow );
19601 %}
19602 
19603 instruct insertF(vec dst, regF val, immU8 idx) %{
19604   predicate(Matcher::vector_length(n) < 8);
19605   match(Set dst (VectorInsert (Binary dst val) idx));
19606   format %{ "vector_insert $dst,$val,$idx" %}
19607   ins_encode %{
19608     assert(UseSSE >= 4, "sanity");
19609 
19610     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19611     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19612 
19613     uint x_idx = $idx$$constant & right_n_bits(2);
19614     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19615   %}
19616   ins_pipe( pipe_slow );
19617 %}
19618 
19619 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19620   predicate(Matcher::vector_length(n) >= 8);
19621   match(Set dst (VectorInsert (Binary src val) idx));
19622   effect(TEMP vtmp);
19623   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19624   ins_encode %{
19625     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19626     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19627 
19628     int vlen = Matcher::vector_length(this);
19629     uint x_idx = $idx$$constant & right_n_bits(2);
19630     if (vlen == 8) {
19631       uint y_idx = ($idx$$constant >> 2) & 1;
19632       int vlen_enc = Assembler::AVX_256bit;
19633       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19634       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19635       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19636     } else {
19637       assert(vlen == 16, "sanity");
19638       uint y_idx = ($idx$$constant >> 2) & 3;
19639       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19640       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19641       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19642     }
19643   %}
19644   ins_pipe( pipe_slow );
19645 %}
19646 
19647 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19648   predicate(Matcher::vector_length(n) == 2);
19649   match(Set dst (VectorInsert (Binary dst val) idx));
19650   effect(TEMP tmp);
19651   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19652   ins_encode %{
19653     assert(UseSSE >= 4, "sanity");
19654     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19655     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19656 
19657     __ movq($tmp$$Register, $val$$XMMRegister);
19658     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19659   %}
19660   ins_pipe( pipe_slow );
19661 %}
19662 
19663 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19664   predicate(Matcher::vector_length(n) == 4);
19665   match(Set dst (VectorInsert (Binary src val) idx));
19666   effect(TEMP vtmp, TEMP tmp);
19667   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19668   ins_encode %{
19669     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19670     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19671 
19672     uint x_idx = $idx$$constant & right_n_bits(1);
19673     uint y_idx = ($idx$$constant >> 1) & 1;
19674     int vlen_enc = Assembler::AVX_256bit;
19675     __ movq($tmp$$Register, $val$$XMMRegister);
19676     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19677     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19678     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19679   %}
19680   ins_pipe( pipe_slow );
19681 %}
19682 
19683 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19684   predicate(Matcher::vector_length(n) == 8);
19685   match(Set dst (VectorInsert (Binary src val) idx));
19686   effect(TEMP tmp, TEMP vtmp);
19687   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19688   ins_encode %{
19689     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19690     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19691 
19692     uint x_idx = $idx$$constant & right_n_bits(1);
19693     uint y_idx = ($idx$$constant >> 1) & 3;
19694     __ movq($tmp$$Register, $val$$XMMRegister);
19695     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19696     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19697     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19698   %}
19699   ins_pipe( pipe_slow );
19700 %}
19701 
19702 // ====================REDUCTION ARITHMETIC=======================================
19703 
19704 // =======================Int Reduction==========================================
19705 
19706 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19707   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19708   match(Set dst (AddReductionVI src1 src2));
19709   match(Set dst (MulReductionVI src1 src2));
19710   match(Set dst (AndReductionV  src1 src2));
19711   match(Set dst ( OrReductionV  src1 src2));
19712   match(Set dst (XorReductionV  src1 src2));
19713   match(Set dst (MinReductionV  src1 src2));
19714   match(Set dst (MaxReductionV  src1 src2));
19715   match(Set dst (UMinReductionV  src1 src2));
19716   match(Set dst (UMaxReductionV  src1 src2));
19717   effect(TEMP vtmp1, TEMP vtmp2);
19718   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19719   ins_encode %{
19720     int opcode = this->ideal_Opcode();
19721     int vlen = Matcher::vector_length(this, $src2);
19722     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19723   %}
19724   ins_pipe( pipe_slow );
19725 %}
19726 
19727 // =======================Long Reduction==========================================
19728 
19729 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19730   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19731   match(Set dst (AddReductionVL src1 src2));
19732   match(Set dst (MulReductionVL src1 src2));
19733   match(Set dst (AndReductionV  src1 src2));
19734   match(Set dst ( OrReductionV  src1 src2));
19735   match(Set dst (XorReductionV  src1 src2));
19736   match(Set dst (MinReductionV  src1 src2));
19737   match(Set dst (MaxReductionV  src1 src2));
19738   match(Set dst (UMinReductionV  src1 src2));
19739   match(Set dst (UMaxReductionV  src1 src2));
19740   effect(TEMP vtmp1, TEMP vtmp2);
19741   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19742   ins_encode %{
19743     int opcode = this->ideal_Opcode();
19744     int vlen = Matcher::vector_length(this, $src2);
19745     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19746   %}
19747   ins_pipe( pipe_slow );
19748 %}
19749 
19750 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19751   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19752   match(Set dst (AddReductionVL src1 src2));
19753   match(Set dst (MulReductionVL src1 src2));
19754   match(Set dst (AndReductionV  src1 src2));
19755   match(Set dst ( OrReductionV  src1 src2));
19756   match(Set dst (XorReductionV  src1 src2));
19757   match(Set dst (MinReductionV  src1 src2));
19758   match(Set dst (MaxReductionV  src1 src2));
19759   match(Set dst (UMinReductionV  src1 src2));
19760   match(Set dst (UMaxReductionV  src1 src2));
19761   effect(TEMP vtmp1, TEMP vtmp2);
19762   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19763   ins_encode %{
19764     int opcode = this->ideal_Opcode();
19765     int vlen = Matcher::vector_length(this, $src2);
19766     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19767   %}
19768   ins_pipe( pipe_slow );
19769 %}
19770 
19771 // =======================Float Reduction==========================================
19772 
19773 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19774   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19775   match(Set dst (AddReductionVF dst src));
19776   match(Set dst (MulReductionVF dst src));
19777   effect(TEMP dst, TEMP vtmp);
19778   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19779   ins_encode %{
19780     int opcode = this->ideal_Opcode();
19781     int vlen = Matcher::vector_length(this, $src);
19782     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19783   %}
19784   ins_pipe( pipe_slow );
19785 %}
19786 
19787 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19788   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19789   match(Set dst (AddReductionVF dst src));
19790   match(Set dst (MulReductionVF dst src));
19791   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19792   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19793   ins_encode %{
19794     int opcode = this->ideal_Opcode();
19795     int vlen = Matcher::vector_length(this, $src);
19796     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19797   %}
19798   ins_pipe( pipe_slow );
19799 %}
19800 
19801 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19802   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19803   match(Set dst (AddReductionVF dst src));
19804   match(Set dst (MulReductionVF dst src));
19805   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19806   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19807   ins_encode %{
19808     int opcode = this->ideal_Opcode();
19809     int vlen = Matcher::vector_length(this, $src);
19810     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19811   %}
19812   ins_pipe( pipe_slow );
19813 %}
19814 
19815 
19816 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19817   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19818   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19819   // src1 contains reduction identity
19820   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19821   match(Set dst (AddReductionVF src1 src2));
19822   match(Set dst (MulReductionVF src1 src2));
19823   effect(TEMP dst);
19824   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19825   ins_encode %{
19826     int opcode = this->ideal_Opcode();
19827     int vlen = Matcher::vector_length(this, $src2);
19828     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19829   %}
19830   ins_pipe( pipe_slow );
19831 %}
19832 
19833 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19834   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19835   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19836   // src1 contains reduction identity
19837   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19838   match(Set dst (AddReductionVF src1 src2));
19839   match(Set dst (MulReductionVF src1 src2));
19840   effect(TEMP dst, TEMP vtmp);
19841   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19842   ins_encode %{
19843     int opcode = this->ideal_Opcode();
19844     int vlen = Matcher::vector_length(this, $src2);
19845     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19846   %}
19847   ins_pipe( pipe_slow );
19848 %}
19849 
19850 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19851   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19852   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19853   // src1 contains reduction identity
19854   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19855   match(Set dst (AddReductionVF src1 src2));
19856   match(Set dst (MulReductionVF src1 src2));
19857   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19858   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19859   ins_encode %{
19860     int opcode = this->ideal_Opcode();
19861     int vlen = Matcher::vector_length(this, $src2);
19862     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19863   %}
19864   ins_pipe( pipe_slow );
19865 %}
19866 
19867 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19868   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19869   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19870   // src1 contains reduction identity
19871   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19872   match(Set dst (AddReductionVF src1 src2));
19873   match(Set dst (MulReductionVF src1 src2));
19874   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19875   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19876   ins_encode %{
19877     int opcode = this->ideal_Opcode();
19878     int vlen = Matcher::vector_length(this, $src2);
19879     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19880   %}
19881   ins_pipe( pipe_slow );
19882 %}
19883 
19884 // =======================Double Reduction==========================================
19885 
19886 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19887   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19888   match(Set dst (AddReductionVD dst src));
19889   match(Set dst (MulReductionVD dst src));
19890   effect(TEMP dst, TEMP vtmp);
19891   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19892   ins_encode %{
19893     int opcode = this->ideal_Opcode();
19894     int vlen = Matcher::vector_length(this, $src);
19895     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19896 %}
19897   ins_pipe( pipe_slow );
19898 %}
19899 
19900 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19901   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19902   match(Set dst (AddReductionVD dst src));
19903   match(Set dst (MulReductionVD dst src));
19904   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19905   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19906   ins_encode %{
19907     int opcode = this->ideal_Opcode();
19908     int vlen = Matcher::vector_length(this, $src);
19909     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19910   %}
19911   ins_pipe( pipe_slow );
19912 %}
19913 
19914 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19915   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19916   match(Set dst (AddReductionVD dst src));
19917   match(Set dst (MulReductionVD dst src));
19918   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19919   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19920   ins_encode %{
19921     int opcode = this->ideal_Opcode();
19922     int vlen = Matcher::vector_length(this, $src);
19923     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19924   %}
19925   ins_pipe( pipe_slow );
19926 %}
19927 
19928 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19929   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19930   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19931   // src1 contains reduction identity
19932   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19933   match(Set dst (AddReductionVD src1 src2));
19934   match(Set dst (MulReductionVD src1 src2));
19935   effect(TEMP dst);
19936   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19937   ins_encode %{
19938     int opcode = this->ideal_Opcode();
19939     int vlen = Matcher::vector_length(this, $src2);
19940     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19941 %}
19942   ins_pipe( pipe_slow );
19943 %}
19944 
19945 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19946   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19947   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19948   // src1 contains reduction identity
19949   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19950   match(Set dst (AddReductionVD src1 src2));
19951   match(Set dst (MulReductionVD src1 src2));
19952   effect(TEMP dst, TEMP vtmp);
19953   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19954   ins_encode %{
19955     int opcode = this->ideal_Opcode();
19956     int vlen = Matcher::vector_length(this, $src2);
19957     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19958   %}
19959   ins_pipe( pipe_slow );
19960 %}
19961 
19962 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19963   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19964   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19965   // src1 contains reduction identity
19966   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19967   match(Set dst (AddReductionVD src1 src2));
19968   match(Set dst (MulReductionVD src1 src2));
19969   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19970   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19971   ins_encode %{
19972     int opcode = this->ideal_Opcode();
19973     int vlen = Matcher::vector_length(this, $src2);
19974     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19975   %}
19976   ins_pipe( pipe_slow );
19977 %}
19978 
19979 // =======================Byte Reduction==========================================
19980 
19981 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19982   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19983   match(Set dst (AddReductionVI src1 src2));
19984   match(Set dst (AndReductionV  src1 src2));
19985   match(Set dst ( OrReductionV  src1 src2));
19986   match(Set dst (XorReductionV  src1 src2));
19987   match(Set dst (MinReductionV  src1 src2));
19988   match(Set dst (MaxReductionV  src1 src2));
19989   match(Set dst (UMinReductionV  src1 src2));
19990   match(Set dst (UMaxReductionV  src1 src2));
19991   effect(TEMP vtmp1, TEMP vtmp2);
19992   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19993   ins_encode %{
19994     int opcode = this->ideal_Opcode();
19995     int vlen = Matcher::vector_length(this, $src2);
19996     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19997   %}
19998   ins_pipe( pipe_slow );
19999 %}
20000 
20001 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
20002   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
20003   match(Set dst (AddReductionVI src1 src2));
20004   match(Set dst (AndReductionV  src1 src2));
20005   match(Set dst ( OrReductionV  src1 src2));
20006   match(Set dst (XorReductionV  src1 src2));
20007   match(Set dst (MinReductionV  src1 src2));
20008   match(Set dst (MaxReductionV  src1 src2));
20009   match(Set dst (UMinReductionV  src1 src2));
20010   match(Set dst (UMaxReductionV  src1 src2));
20011   effect(TEMP vtmp1, TEMP vtmp2);
20012   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
20013   ins_encode %{
20014     int opcode = this->ideal_Opcode();
20015     int vlen = Matcher::vector_length(this, $src2);
20016     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20017   %}
20018   ins_pipe( pipe_slow );
20019 %}
20020 
20021 // =======================Short Reduction==========================================
20022 
20023 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
20024   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
20025   match(Set dst (AddReductionVI src1 src2));
20026   match(Set dst (MulReductionVI src1 src2));
20027   match(Set dst (AndReductionV  src1 src2));
20028   match(Set dst ( OrReductionV  src1 src2));
20029   match(Set dst (XorReductionV  src1 src2));
20030   match(Set dst (MinReductionV  src1 src2));
20031   match(Set dst (MaxReductionV  src1 src2));
20032   match(Set dst (UMinReductionV  src1 src2));
20033   match(Set dst (UMaxReductionV  src1 src2));
20034   effect(TEMP vtmp1, TEMP vtmp2);
20035   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
20036   ins_encode %{
20037     int opcode = this->ideal_Opcode();
20038     int vlen = Matcher::vector_length(this, $src2);
20039     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20040   %}
20041   ins_pipe( pipe_slow );
20042 %}
20043 
20044 // =======================Mul Reduction==========================================
20045 
20046 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
20047   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
20048             Matcher::vector_length(n->in(2)) <= 32); // src2
20049   match(Set dst (MulReductionVI src1 src2));
20050   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
20051   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
20052   ins_encode %{
20053     int opcode = this->ideal_Opcode();
20054     int vlen = Matcher::vector_length(this, $src2);
20055     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20056   %}
20057   ins_pipe( pipe_slow );
20058 %}
20059 
20060 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
20061   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
20062             Matcher::vector_length(n->in(2)) == 64); // src2
20063   match(Set dst (MulReductionVI src1 src2));
20064   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
20065   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
20066   ins_encode %{
20067     int opcode = this->ideal_Opcode();
20068     int vlen = Matcher::vector_length(this, $src2);
20069     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20070   %}
20071   ins_pipe( pipe_slow );
20072 %}
20073 
20074 //--------------------Min/Max Float Reduction --------------------
20075 // Float Min Reduction
20076 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20077                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20078   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20079             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20080              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20081             Matcher::vector_length(n->in(2)) == 2);
20082   match(Set dst (MinReductionV src1 src2));
20083   match(Set dst (MaxReductionV src1 src2));
20084   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20085   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20086   ins_encode %{
20087     assert(UseAVX > 0, "sanity");
20088 
20089     int opcode = this->ideal_Opcode();
20090     int vlen = Matcher::vector_length(this, $src2);
20091     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20092                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20093   %}
20094   ins_pipe( pipe_slow );
20095 %}
20096 
20097 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20098                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20099   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20100             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20101              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20102             Matcher::vector_length(n->in(2)) >= 4);
20103   match(Set dst (MinReductionV src1 src2));
20104   match(Set dst (MaxReductionV src1 src2));
20105   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20106   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20107   ins_encode %{
20108     assert(UseAVX > 0, "sanity");
20109 
20110     int opcode = this->ideal_Opcode();
20111     int vlen = Matcher::vector_length(this, $src2);
20112     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20113                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20114   %}
20115   ins_pipe( pipe_slow );
20116 %}
20117 
20118 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
20119                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20120   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20121             Matcher::vector_length(n->in(2)) == 2);
20122   match(Set dst (MinReductionV dst src));
20123   match(Set dst (MaxReductionV dst src));
20124   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20125   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20126   ins_encode %{
20127     assert(UseAVX > 0, "sanity");
20128 
20129     int opcode = this->ideal_Opcode();
20130     int vlen = Matcher::vector_length(this, $src);
20131     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20132                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20133   %}
20134   ins_pipe( pipe_slow );
20135 %}
20136 
20137 
20138 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
20139                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20140   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20141             Matcher::vector_length(n->in(2)) >= 4);
20142   match(Set dst (MinReductionV dst src));
20143   match(Set dst (MaxReductionV dst src));
20144   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20145   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20146   ins_encode %{
20147     assert(UseAVX > 0, "sanity");
20148 
20149     int opcode = this->ideal_Opcode();
20150     int vlen = Matcher::vector_length(this, $src);
20151     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20152                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20153   %}
20154   ins_pipe( pipe_slow );
20155 %}
20156 
20157 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
20158   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20159             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20160              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20161             Matcher::vector_length(n->in(2)) == 2);
20162   match(Set dst (MinReductionV src1 src2));
20163   match(Set dst (MaxReductionV src1 src2));
20164   effect(TEMP dst, TEMP xtmp1);
20165   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
20166   ins_encode %{
20167     int opcode = this->ideal_Opcode();
20168     int vlen = Matcher::vector_length(this, $src2);
20169     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20170                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20171   %}
20172   ins_pipe( pipe_slow );
20173 %}
20174 
20175 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
20176   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20177             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20178              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20179             Matcher::vector_length(n->in(2)) >= 4);
20180   match(Set dst (MinReductionV src1 src2));
20181   match(Set dst (MaxReductionV src1 src2));
20182   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20183   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
20184   ins_encode %{
20185     int opcode = this->ideal_Opcode();
20186     int vlen = Matcher::vector_length(this, $src2);
20187     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20188                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20189   %}
20190   ins_pipe( pipe_slow );
20191 %}
20192 
20193 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
20194   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20195             Matcher::vector_length(n->in(2)) == 2);
20196   match(Set dst (MinReductionV dst src));
20197   match(Set dst (MaxReductionV dst src));
20198   effect(TEMP dst, TEMP xtmp1);
20199   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
20200   ins_encode %{
20201     int opcode = this->ideal_Opcode();
20202     int vlen = Matcher::vector_length(this, $src);
20203     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20204                          $xtmp1$$XMMRegister);
20205   %}
20206   ins_pipe( pipe_slow );
20207 %}
20208 
20209 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
20210   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20211             Matcher::vector_length(n->in(2)) >= 4);
20212   match(Set dst (MinReductionV dst src));
20213   match(Set dst (MaxReductionV dst src));
20214   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20215   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
20216   ins_encode %{
20217     int opcode = this->ideal_Opcode();
20218     int vlen = Matcher::vector_length(this, $src);
20219     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20220                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20221   %}
20222   ins_pipe( pipe_slow );
20223 %}
20224 
20225 //--------------------Min Double Reduction --------------------
20226 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20227                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20228   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20229             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20230              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20231             Matcher::vector_length(n->in(2)) == 2);
20232   match(Set dst (MinReductionV src1 src2));
20233   match(Set dst (MaxReductionV src1 src2));
20234   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20235   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20236   ins_encode %{
20237     assert(UseAVX > 0, "sanity");
20238 
20239     int opcode = this->ideal_Opcode();
20240     int vlen = Matcher::vector_length(this, $src2);
20241     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20242                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20243   %}
20244   ins_pipe( pipe_slow );
20245 %}
20246 
20247 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20248                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20249   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20250             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20251              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20252             Matcher::vector_length(n->in(2)) >= 4);
20253   match(Set dst (MinReductionV src1 src2));
20254   match(Set dst (MaxReductionV src1 src2));
20255   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20256   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20257   ins_encode %{
20258     assert(UseAVX > 0, "sanity");
20259 
20260     int opcode = this->ideal_Opcode();
20261     int vlen = Matcher::vector_length(this, $src2);
20262     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20263                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20264   %}
20265   ins_pipe( pipe_slow );
20266 %}
20267 
20268 
20269 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
20270                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20271   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20272             Matcher::vector_length(n->in(2)) == 2);
20273   match(Set dst (MinReductionV dst src));
20274   match(Set dst (MaxReductionV dst src));
20275   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20276   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20277   ins_encode %{
20278     assert(UseAVX > 0, "sanity");
20279 
20280     int opcode = this->ideal_Opcode();
20281     int vlen = Matcher::vector_length(this, $src);
20282     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20283                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20284   %}
20285   ins_pipe( pipe_slow );
20286 %}
20287 
20288 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
20289                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20290   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20291             Matcher::vector_length(n->in(2)) >= 4);
20292   match(Set dst (MinReductionV dst src));
20293   match(Set dst (MaxReductionV dst src));
20294   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20295   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20296   ins_encode %{
20297     assert(UseAVX > 0, "sanity");
20298 
20299     int opcode = this->ideal_Opcode();
20300     int vlen = Matcher::vector_length(this, $src);
20301     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20302                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20303   %}
20304   ins_pipe( pipe_slow );
20305 %}
20306 
20307 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
20308   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20309             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20310              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20311             Matcher::vector_length(n->in(2)) == 2);
20312   match(Set dst (MinReductionV src1 src2));
20313   match(Set dst (MaxReductionV src1 src2));
20314   effect(TEMP dst, TEMP xtmp1);
20315   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20316   ins_encode %{
20317     int opcode = this->ideal_Opcode();
20318     int vlen = Matcher::vector_length(this, $src2);
20319     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20320                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
20321   %}
20322   ins_pipe( pipe_slow );
20323 %}
20324 
20325 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20326   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20327             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20328              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20329             Matcher::vector_length(n->in(2)) >= 4);
20330   match(Set dst (MinReductionV src1 src2));
20331   match(Set dst (MaxReductionV src1 src2));
20332   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20333   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20334   ins_encode %{
20335     int opcode = this->ideal_Opcode();
20336     int vlen = Matcher::vector_length(this, $src2);
20337     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20338                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20339   %}
20340   ins_pipe( pipe_slow );
20341 %}
20342 
20343 
20344 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20345   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20346             Matcher::vector_length(n->in(2)) == 2);
20347   match(Set dst (MinReductionV dst src));
20348   match(Set dst (MaxReductionV dst src));
20349   effect(TEMP dst, TEMP xtmp1);
20350   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20351   ins_encode %{
20352     int opcode = this->ideal_Opcode();
20353     int vlen = Matcher::vector_length(this, $src);
20354     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20355                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20356   %}
20357   ins_pipe( pipe_slow );
20358 %}
20359 
20360 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20361   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20362             Matcher::vector_length(n->in(2)) >= 4);
20363   match(Set dst (MinReductionV dst src));
20364   match(Set dst (MaxReductionV dst src));
20365   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20366   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20367   ins_encode %{
20368     int opcode = this->ideal_Opcode();
20369     int vlen = Matcher::vector_length(this, $src);
20370     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20371                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20372   %}
20373   ins_pipe( pipe_slow );
20374 %}
20375 
20376 // ====================VECTOR ARITHMETIC=======================================
20377 
20378 // --------------------------------- ADD --------------------------------------
20379 
20380 // Bytes vector add
20381 instruct vaddB(vec dst, vec src) %{
20382   predicate(UseAVX == 0);
20383   match(Set dst (AddVB dst src));
20384   format %{ "paddb   $dst,$src\t! add packedB" %}
20385   ins_encode %{
20386     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20387   %}
20388   ins_pipe( pipe_slow );
20389 %}
20390 
20391 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20392   predicate(UseAVX > 0);
20393   match(Set dst (AddVB src1 src2));
20394   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
20395   ins_encode %{
20396     int vlen_enc = vector_length_encoding(this);
20397     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20398   %}
20399   ins_pipe( pipe_slow );
20400 %}
20401 
20402 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20403   predicate((UseAVX > 0) &&
20404             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20405   match(Set dst (AddVB src (LoadVector mem)));
20406   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
20407   ins_encode %{
20408     int vlen_enc = vector_length_encoding(this);
20409     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20410   %}
20411   ins_pipe( pipe_slow );
20412 %}
20413 
20414 // Shorts/Chars vector add
20415 instruct vaddS(vec dst, vec src) %{
20416   predicate(UseAVX == 0);
20417   match(Set dst (AddVS dst src));
20418   format %{ "paddw   $dst,$src\t! add packedS" %}
20419   ins_encode %{
20420     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20421   %}
20422   ins_pipe( pipe_slow );
20423 %}
20424 
20425 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20426   predicate(UseAVX > 0);
20427   match(Set dst (AddVS src1 src2));
20428   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
20429   ins_encode %{
20430     int vlen_enc = vector_length_encoding(this);
20431     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20432   %}
20433   ins_pipe( pipe_slow );
20434 %}
20435 
20436 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20437   predicate((UseAVX > 0) &&
20438             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20439   match(Set dst (AddVS src (LoadVector mem)));
20440   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
20441   ins_encode %{
20442     int vlen_enc = vector_length_encoding(this);
20443     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20444   %}
20445   ins_pipe( pipe_slow );
20446 %}
20447 
20448 // Integers vector add
20449 instruct vaddI(vec dst, vec src) %{
20450   predicate(UseAVX == 0);
20451   match(Set dst (AddVI dst src));
20452   format %{ "paddd   $dst,$src\t! add packedI" %}
20453   ins_encode %{
20454     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20455   %}
20456   ins_pipe( pipe_slow );
20457 %}
20458 
20459 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20460   predicate(UseAVX > 0);
20461   match(Set dst (AddVI src1 src2));
20462   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
20463   ins_encode %{
20464     int vlen_enc = vector_length_encoding(this);
20465     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20466   %}
20467   ins_pipe( pipe_slow );
20468 %}
20469 
20470 
20471 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20472   predicate((UseAVX > 0) &&
20473             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20474   match(Set dst (AddVI src (LoadVector mem)));
20475   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
20476   ins_encode %{
20477     int vlen_enc = vector_length_encoding(this);
20478     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20479   %}
20480   ins_pipe( pipe_slow );
20481 %}
20482 
20483 // Longs vector add
20484 instruct vaddL(vec dst, vec src) %{
20485   predicate(UseAVX == 0);
20486   match(Set dst (AddVL dst src));
20487   format %{ "paddq   $dst,$src\t! add packedL" %}
20488   ins_encode %{
20489     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20490   %}
20491   ins_pipe( pipe_slow );
20492 %}
20493 
20494 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20495   predicate(UseAVX > 0);
20496   match(Set dst (AddVL src1 src2));
20497   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
20498   ins_encode %{
20499     int vlen_enc = vector_length_encoding(this);
20500     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20501   %}
20502   ins_pipe( pipe_slow );
20503 %}
20504 
20505 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20506   predicate((UseAVX > 0) &&
20507             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20508   match(Set dst (AddVL src (LoadVector mem)));
20509   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
20510   ins_encode %{
20511     int vlen_enc = vector_length_encoding(this);
20512     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20513   %}
20514   ins_pipe( pipe_slow );
20515 %}
20516 
20517 // Floats vector add
20518 instruct vaddF(vec dst, vec src) %{
20519   predicate(UseAVX == 0);
20520   match(Set dst (AddVF dst src));
20521   format %{ "addps   $dst,$src\t! add packedF" %}
20522   ins_encode %{
20523     __ addps($dst$$XMMRegister, $src$$XMMRegister);
20524   %}
20525   ins_pipe( pipe_slow );
20526 %}
20527 
20528 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20529   predicate(UseAVX > 0);
20530   match(Set dst (AddVF src1 src2));
20531   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20532   ins_encode %{
20533     int vlen_enc = vector_length_encoding(this);
20534     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20535   %}
20536   ins_pipe( pipe_slow );
20537 %}
20538 
20539 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20540   predicate((UseAVX > 0) &&
20541             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20542   match(Set dst (AddVF src (LoadVector mem)));
20543   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20544   ins_encode %{
20545     int vlen_enc = vector_length_encoding(this);
20546     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20547   %}
20548   ins_pipe( pipe_slow );
20549 %}
20550 
20551 // Doubles vector add
20552 instruct vaddD(vec dst, vec src) %{
20553   predicate(UseAVX == 0);
20554   match(Set dst (AddVD dst src));
20555   format %{ "addpd   $dst,$src\t! add packedD" %}
20556   ins_encode %{
20557     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20558   %}
20559   ins_pipe( pipe_slow );
20560 %}
20561 
20562 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20563   predicate(UseAVX > 0);
20564   match(Set dst (AddVD src1 src2));
20565   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20566   ins_encode %{
20567     int vlen_enc = vector_length_encoding(this);
20568     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20569   %}
20570   ins_pipe( pipe_slow );
20571 %}
20572 
20573 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20574   predicate((UseAVX > 0) &&
20575             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20576   match(Set dst (AddVD src (LoadVector mem)));
20577   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20578   ins_encode %{
20579     int vlen_enc = vector_length_encoding(this);
20580     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20581   %}
20582   ins_pipe( pipe_slow );
20583 %}
20584 
20585 // --------------------------------- SUB --------------------------------------
20586 
20587 // Bytes vector sub
20588 instruct vsubB(vec dst, vec src) %{
20589   predicate(UseAVX == 0);
20590   match(Set dst (SubVB dst src));
20591   format %{ "psubb   $dst,$src\t! sub packedB" %}
20592   ins_encode %{
20593     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20594   %}
20595   ins_pipe( pipe_slow );
20596 %}
20597 
20598 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20599   predicate(UseAVX > 0);
20600   match(Set dst (SubVB src1 src2));
20601   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20602   ins_encode %{
20603     int vlen_enc = vector_length_encoding(this);
20604     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20605   %}
20606   ins_pipe( pipe_slow );
20607 %}
20608 
20609 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20610   predicate((UseAVX > 0) &&
20611             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20612   match(Set dst (SubVB src (LoadVector mem)));
20613   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20614   ins_encode %{
20615     int vlen_enc = vector_length_encoding(this);
20616     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20617   %}
20618   ins_pipe( pipe_slow );
20619 %}
20620 
20621 // Shorts/Chars vector sub
20622 instruct vsubS(vec dst, vec src) %{
20623   predicate(UseAVX == 0);
20624   match(Set dst (SubVS dst src));
20625   format %{ "psubw   $dst,$src\t! sub packedS" %}
20626   ins_encode %{
20627     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20628   %}
20629   ins_pipe( pipe_slow );
20630 %}
20631 
20632 
20633 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20634   predicate(UseAVX > 0);
20635   match(Set dst (SubVS src1 src2));
20636   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20637   ins_encode %{
20638     int vlen_enc = vector_length_encoding(this);
20639     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20640   %}
20641   ins_pipe( pipe_slow );
20642 %}
20643 
20644 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20645   predicate((UseAVX > 0) &&
20646             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20647   match(Set dst (SubVS src (LoadVector mem)));
20648   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20649   ins_encode %{
20650     int vlen_enc = vector_length_encoding(this);
20651     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20652   %}
20653   ins_pipe( pipe_slow );
20654 %}
20655 
20656 // Integers vector sub
20657 instruct vsubI(vec dst, vec src) %{
20658   predicate(UseAVX == 0);
20659   match(Set dst (SubVI dst src));
20660   format %{ "psubd   $dst,$src\t! sub packedI" %}
20661   ins_encode %{
20662     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20663   %}
20664   ins_pipe( pipe_slow );
20665 %}
20666 
20667 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20668   predicate(UseAVX > 0);
20669   match(Set dst (SubVI src1 src2));
20670   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20671   ins_encode %{
20672     int vlen_enc = vector_length_encoding(this);
20673     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20674   %}
20675   ins_pipe( pipe_slow );
20676 %}
20677 
20678 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20679   predicate((UseAVX > 0) &&
20680             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20681   match(Set dst (SubVI src (LoadVector mem)));
20682   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20683   ins_encode %{
20684     int vlen_enc = vector_length_encoding(this);
20685     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20686   %}
20687   ins_pipe( pipe_slow );
20688 %}
20689 
20690 // Longs vector sub
20691 instruct vsubL(vec dst, vec src) %{
20692   predicate(UseAVX == 0);
20693   match(Set dst (SubVL dst src));
20694   format %{ "psubq   $dst,$src\t! sub packedL" %}
20695   ins_encode %{
20696     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20697   %}
20698   ins_pipe( pipe_slow );
20699 %}
20700 
20701 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20702   predicate(UseAVX > 0);
20703   match(Set dst (SubVL src1 src2));
20704   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20705   ins_encode %{
20706     int vlen_enc = vector_length_encoding(this);
20707     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20708   %}
20709   ins_pipe( pipe_slow );
20710 %}
20711 
20712 
20713 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20714   predicate((UseAVX > 0) &&
20715             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20716   match(Set dst (SubVL src (LoadVector mem)));
20717   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20718   ins_encode %{
20719     int vlen_enc = vector_length_encoding(this);
20720     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20721   %}
20722   ins_pipe( pipe_slow );
20723 %}
20724 
20725 // Floats vector sub
20726 instruct vsubF(vec dst, vec src) %{
20727   predicate(UseAVX == 0);
20728   match(Set dst (SubVF dst src));
20729   format %{ "subps   $dst,$src\t! sub packedF" %}
20730   ins_encode %{
20731     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20732   %}
20733   ins_pipe( pipe_slow );
20734 %}
20735 
20736 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20737   predicate(UseAVX > 0);
20738   match(Set dst (SubVF src1 src2));
20739   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20740   ins_encode %{
20741     int vlen_enc = vector_length_encoding(this);
20742     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20743   %}
20744   ins_pipe( pipe_slow );
20745 %}
20746 
20747 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20748   predicate((UseAVX > 0) &&
20749             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20750   match(Set dst (SubVF src (LoadVector mem)));
20751   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20752   ins_encode %{
20753     int vlen_enc = vector_length_encoding(this);
20754     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20755   %}
20756   ins_pipe( pipe_slow );
20757 %}
20758 
20759 // Doubles vector sub
20760 instruct vsubD(vec dst, vec src) %{
20761   predicate(UseAVX == 0);
20762   match(Set dst (SubVD dst src));
20763   format %{ "subpd   $dst,$src\t! sub packedD" %}
20764   ins_encode %{
20765     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20766   %}
20767   ins_pipe( pipe_slow );
20768 %}
20769 
20770 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20771   predicate(UseAVX > 0);
20772   match(Set dst (SubVD src1 src2));
20773   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20774   ins_encode %{
20775     int vlen_enc = vector_length_encoding(this);
20776     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20777   %}
20778   ins_pipe( pipe_slow );
20779 %}
20780 
20781 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20782   predicate((UseAVX > 0) &&
20783             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20784   match(Set dst (SubVD src (LoadVector mem)));
20785   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20786   ins_encode %{
20787     int vlen_enc = vector_length_encoding(this);
20788     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20789   %}
20790   ins_pipe( pipe_slow );
20791 %}
20792 
20793 // --------------------------------- MUL --------------------------------------
20794 
20795 // Byte vector mul
20796 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20797   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20798   match(Set dst (MulVB src1 src2));
20799   effect(TEMP dst, TEMP xtmp);
20800   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20801   ins_encode %{
20802     assert(UseSSE > 3, "required");
20803     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20804     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20805     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20806     __ psllw($dst$$XMMRegister, 8);
20807     __ psrlw($dst$$XMMRegister, 8);
20808     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20809   %}
20810   ins_pipe( pipe_slow );
20811 %}
20812 
20813 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20814   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20815   match(Set dst (MulVB src1 src2));
20816   effect(TEMP dst, TEMP xtmp);
20817   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20818   ins_encode %{
20819     assert(UseSSE > 3, "required");
20820     // Odd-index elements
20821     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20822     __ psrlw($dst$$XMMRegister, 8);
20823     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20824     __ psrlw($xtmp$$XMMRegister, 8);
20825     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20826     __ psllw($dst$$XMMRegister, 8);
20827     // Even-index elements
20828     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20829     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20830     __ psllw($xtmp$$XMMRegister, 8);
20831     __ psrlw($xtmp$$XMMRegister, 8);
20832     // Combine
20833     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20834   %}
20835   ins_pipe( pipe_slow );
20836 %}
20837 
20838 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20839   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20840   match(Set dst (MulVB src1 src2));
20841   effect(TEMP xtmp1, TEMP xtmp2);
20842   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20843   ins_encode %{
20844     int vlen_enc = vector_length_encoding(this);
20845     // Odd-index elements
20846     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20847     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20848     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20849     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20850     // Even-index elements
20851     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20852     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20853     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20854     // Combine
20855     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20856   %}
20857   ins_pipe( pipe_slow );
20858 %}
20859 
20860 // Shorts/Chars vector mul
20861 instruct vmulS(vec dst, vec src) %{
20862   predicate(UseAVX == 0);
20863   match(Set dst (MulVS dst src));
20864   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20865   ins_encode %{
20866     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20867   %}
20868   ins_pipe( pipe_slow );
20869 %}
20870 
20871 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20872   predicate(UseAVX > 0);
20873   match(Set dst (MulVS src1 src2));
20874   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20875   ins_encode %{
20876     int vlen_enc = vector_length_encoding(this);
20877     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20878   %}
20879   ins_pipe( pipe_slow );
20880 %}
20881 
20882 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20883   predicate((UseAVX > 0) &&
20884             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20885   match(Set dst (MulVS src (LoadVector mem)));
20886   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20887   ins_encode %{
20888     int vlen_enc = vector_length_encoding(this);
20889     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20890   %}
20891   ins_pipe( pipe_slow );
20892 %}
20893 
20894 // Integers vector mul
20895 instruct vmulI(vec dst, vec src) %{
20896   predicate(UseAVX == 0);
20897   match(Set dst (MulVI dst src));
20898   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20899   ins_encode %{
20900     assert(UseSSE > 3, "required");
20901     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20902   %}
20903   ins_pipe( pipe_slow );
20904 %}
20905 
20906 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20907   predicate(UseAVX > 0);
20908   match(Set dst (MulVI src1 src2));
20909   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20910   ins_encode %{
20911     int vlen_enc = vector_length_encoding(this);
20912     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20913   %}
20914   ins_pipe( pipe_slow );
20915 %}
20916 
20917 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20918   predicate((UseAVX > 0) &&
20919             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20920   match(Set dst (MulVI src (LoadVector mem)));
20921   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20922   ins_encode %{
20923     int vlen_enc = vector_length_encoding(this);
20924     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20925   %}
20926   ins_pipe( pipe_slow );
20927 %}
20928 
20929 // Longs vector mul
20930 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20931   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20932              VM_Version::supports_avx512dq()) ||
20933             VM_Version::supports_avx512vldq());
20934   match(Set dst (MulVL src1 src2));
20935   ins_cost(500);
20936   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20937   ins_encode %{
20938     assert(UseAVX > 2, "required");
20939     int vlen_enc = vector_length_encoding(this);
20940     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20941   %}
20942   ins_pipe( pipe_slow );
20943 %}
20944 
20945 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20946   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20947              VM_Version::supports_avx512dq()) ||
20948             (Matcher::vector_length_in_bytes(n) > 8 &&
20949              VM_Version::supports_avx512vldq()));
20950   match(Set dst (MulVL src (LoadVector mem)));
20951   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20952   ins_cost(500);
20953   ins_encode %{
20954     assert(UseAVX > 2, "required");
20955     int vlen_enc = vector_length_encoding(this);
20956     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20957   %}
20958   ins_pipe( pipe_slow );
20959 %}
20960 
20961 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20962   predicate(UseAVX == 0);
20963   match(Set dst (MulVL src1 src2));
20964   ins_cost(500);
20965   effect(TEMP dst, TEMP xtmp);
20966   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20967   ins_encode %{
20968     assert(VM_Version::supports_sse4_1(), "required");
20969     // Get the lo-hi products, only the lower 32 bits is in concerns
20970     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20971     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20972     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20973     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20974     __ psllq($dst$$XMMRegister, 32);
20975     // Get the lo-lo products
20976     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20977     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20978     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20979   %}
20980   ins_pipe( pipe_slow );
20981 %}
20982 
20983 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20984   predicate(UseAVX > 0 &&
20985             ((Matcher::vector_length_in_bytes(n) == 64 &&
20986               !VM_Version::supports_avx512dq()) ||
20987              (Matcher::vector_length_in_bytes(n) < 64 &&
20988               !VM_Version::supports_avx512vldq())));
20989   match(Set dst (MulVL src1 src2));
20990   effect(TEMP xtmp1, TEMP xtmp2);
20991   ins_cost(500);
20992   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20993   ins_encode %{
20994     int vlen_enc = vector_length_encoding(this);
20995     // Get the lo-hi products, only the lower 32 bits is in concerns
20996     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20997     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20998     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20999     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
21000     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
21001     // Get the lo-lo products
21002     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21003     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21004   %}
21005   ins_pipe( pipe_slow );
21006 %}
21007 
21008 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
21009   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
21010   match(Set dst (MulVL src1 src2));
21011   ins_cost(100);
21012   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
21013   ins_encode %{
21014     int vlen_enc = vector_length_encoding(this);
21015     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21016   %}
21017   ins_pipe( pipe_slow );
21018 %}
21019 
21020 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
21021   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
21022   match(Set dst (MulVL src1 src2));
21023   ins_cost(100);
21024   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
21025   ins_encode %{
21026     int vlen_enc = vector_length_encoding(this);
21027     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21028   %}
21029   ins_pipe( pipe_slow );
21030 %}
21031 
21032 // Floats vector mul
21033 instruct vmulF(vec dst, vec src) %{
21034   predicate(UseAVX == 0);
21035   match(Set dst (MulVF dst src));
21036   format %{ "mulps   $dst,$src\t! mul packedF" %}
21037   ins_encode %{
21038     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
21039   %}
21040   ins_pipe( pipe_slow );
21041 %}
21042 
21043 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
21044   predicate(UseAVX > 0);
21045   match(Set dst (MulVF src1 src2));
21046   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
21047   ins_encode %{
21048     int vlen_enc = vector_length_encoding(this);
21049     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21050   %}
21051   ins_pipe( pipe_slow );
21052 %}
21053 
21054 instruct vmulF_mem(vec dst, vec src, memory mem) %{
21055   predicate((UseAVX > 0) &&
21056             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21057   match(Set dst (MulVF src (LoadVector mem)));
21058   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
21059   ins_encode %{
21060     int vlen_enc = vector_length_encoding(this);
21061     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21062   %}
21063   ins_pipe( pipe_slow );
21064 %}
21065 
21066 // Doubles vector mul
21067 instruct vmulD(vec dst, vec src) %{
21068   predicate(UseAVX == 0);
21069   match(Set dst (MulVD dst src));
21070   format %{ "mulpd   $dst,$src\t! mul packedD" %}
21071   ins_encode %{
21072     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
21073   %}
21074   ins_pipe( pipe_slow );
21075 %}
21076 
21077 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
21078   predicate(UseAVX > 0);
21079   match(Set dst (MulVD src1 src2));
21080   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
21081   ins_encode %{
21082     int vlen_enc = vector_length_encoding(this);
21083     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21084   %}
21085   ins_pipe( pipe_slow );
21086 %}
21087 
21088 instruct vmulD_mem(vec dst, vec src, memory mem) %{
21089   predicate((UseAVX > 0) &&
21090             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21091   match(Set dst (MulVD src (LoadVector mem)));
21092   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
21093   ins_encode %{
21094     int vlen_enc = vector_length_encoding(this);
21095     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21096   %}
21097   ins_pipe( pipe_slow );
21098 %}
21099 
21100 // --------------------------------- DIV --------------------------------------
21101 
21102 // Floats vector div
21103 instruct vdivF(vec dst, vec src) %{
21104   predicate(UseAVX == 0);
21105   match(Set dst (DivVF dst src));
21106   format %{ "divps   $dst,$src\t! div packedF" %}
21107   ins_encode %{
21108     __ divps($dst$$XMMRegister, $src$$XMMRegister);
21109   %}
21110   ins_pipe( pipe_slow );
21111 %}
21112 
21113 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
21114   predicate(UseAVX > 0);
21115   match(Set dst (DivVF src1 src2));
21116   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
21117   ins_encode %{
21118     int vlen_enc = vector_length_encoding(this);
21119     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21120   %}
21121   ins_pipe( pipe_slow );
21122 %}
21123 
21124 instruct vdivF_mem(vec dst, vec src, memory mem) %{
21125   predicate((UseAVX > 0) &&
21126             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21127   match(Set dst (DivVF src (LoadVector mem)));
21128   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
21129   ins_encode %{
21130     int vlen_enc = vector_length_encoding(this);
21131     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21132   %}
21133   ins_pipe( pipe_slow );
21134 %}
21135 
21136 // Doubles vector div
21137 instruct vdivD(vec dst, vec src) %{
21138   predicate(UseAVX == 0);
21139   match(Set dst (DivVD dst src));
21140   format %{ "divpd   $dst,$src\t! div packedD" %}
21141   ins_encode %{
21142     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
21143   %}
21144   ins_pipe( pipe_slow );
21145 %}
21146 
21147 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
21148   predicate(UseAVX > 0);
21149   match(Set dst (DivVD src1 src2));
21150   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
21151   ins_encode %{
21152     int vlen_enc = vector_length_encoding(this);
21153     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21154   %}
21155   ins_pipe( pipe_slow );
21156 %}
21157 
21158 instruct vdivD_mem(vec dst, vec src, memory mem) %{
21159   predicate((UseAVX > 0) &&
21160             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21161   match(Set dst (DivVD src (LoadVector mem)));
21162   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
21163   ins_encode %{
21164     int vlen_enc = vector_length_encoding(this);
21165     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21166   %}
21167   ins_pipe( pipe_slow );
21168 %}
21169 
21170 // ------------------------------ MinMax ---------------------------------------
21171 
21172 // Byte, Short, Int vector Min/Max
21173 instruct minmax_reg_sse(vec dst, vec src) %{
21174   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21175             UseAVX == 0);
21176   match(Set dst (MinV dst src));
21177   match(Set dst (MaxV dst src));
21178   format %{ "vector_minmax  $dst,$src\t!  " %}
21179   ins_encode %{
21180     assert(UseSSE >= 4, "required");
21181 
21182     int opcode = this->ideal_Opcode();
21183     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21184     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
21185   %}
21186   ins_pipe( pipe_slow );
21187 %}
21188 
21189 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
21190   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21191             UseAVX > 0);
21192   match(Set dst (MinV src1 src2));
21193   match(Set dst (MaxV src1 src2));
21194   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
21195   ins_encode %{
21196     int opcode = this->ideal_Opcode();
21197     int vlen_enc = vector_length_encoding(this);
21198     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21199 
21200     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21201   %}
21202   ins_pipe( pipe_slow );
21203 %}
21204 
21205 // Long vector Min/Max
21206 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
21207   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
21208             UseAVX == 0);
21209   match(Set dst (MinV dst src));
21210   match(Set dst (MaxV src dst));
21211   effect(TEMP dst, TEMP tmp);
21212   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
21213   ins_encode %{
21214     assert(UseSSE >= 4, "required");
21215 
21216     int opcode = this->ideal_Opcode();
21217     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21218     assert(elem_bt == T_LONG, "sanity");
21219 
21220     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
21221   %}
21222   ins_pipe( pipe_slow );
21223 %}
21224 
21225 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
21226   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
21227             UseAVX > 0 && !VM_Version::supports_avx512vl());
21228   match(Set dst (MinV src1 src2));
21229   match(Set dst (MaxV src1 src2));
21230   effect(TEMP dst);
21231   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
21232   ins_encode %{
21233     int vlen_enc = vector_length_encoding(this);
21234     int opcode = this->ideal_Opcode();
21235     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21236     assert(elem_bt == T_LONG, "sanity");
21237 
21238     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21239   %}
21240   ins_pipe( pipe_slow );
21241 %}
21242 
21243 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
21244   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
21245             Matcher::vector_element_basic_type(n) == T_LONG);
21246   match(Set dst (MinV src1 src2));
21247   match(Set dst (MaxV src1 src2));
21248   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
21249   ins_encode %{
21250     assert(UseAVX > 2, "required");
21251 
21252     int vlen_enc = vector_length_encoding(this);
21253     int opcode = this->ideal_Opcode();
21254     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21255     assert(elem_bt == T_LONG, "sanity");
21256 
21257     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21258   %}
21259   ins_pipe( pipe_slow );
21260 %}
21261 
21262 // Float/Double vector Min/Max
21263 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
21264   predicate(VM_Version::supports_avx10_2() &&
21265             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21266   match(Set dst (MinV a b));
21267   match(Set dst (MaxV a b));
21268   format %{ "vector_minmaxFP  $dst, $a, $b" %}
21269   ins_encode %{
21270     int vlen_enc = vector_length_encoding(this);
21271     int opcode = this->ideal_Opcode();
21272     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21273     __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21274   %}
21275   ins_pipe( pipe_slow );
21276 %}
21277 
21278 // Float/Double vector Min/Max
21279 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
21280   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
21281             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
21282             UseAVX > 0);
21283   match(Set dst (MinV a b));
21284   match(Set dst (MaxV a b));
21285   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
21286   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
21287   ins_encode %{
21288     assert(UseAVX > 0, "required");
21289 
21290     int opcode = this->ideal_Opcode();
21291     int vlen_enc = vector_length_encoding(this);
21292     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21293 
21294     __ vminmax_fp(opcode, elem_bt,
21295                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21296                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21297   %}
21298   ins_pipe( pipe_slow );
21299 %}
21300 
21301 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
21302   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
21303             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21304   match(Set dst (MinV a b));
21305   match(Set dst (MaxV a b));
21306   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
21307   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21308   ins_encode %{
21309     assert(UseAVX > 2, "required");
21310 
21311     int opcode = this->ideal_Opcode();
21312     int vlen_enc = vector_length_encoding(this);
21313     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21314 
21315     __ evminmax_fp(opcode, elem_bt,
21316                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21317                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21318   %}
21319   ins_pipe( pipe_slow );
21320 %}
21321 
21322 // ------------------------------ Unsigned vector Min/Max ----------------------
21323 
21324 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21325   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21326   match(Set dst (UMinV a b));
21327   match(Set dst (UMaxV a b));
21328   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21329   ins_encode %{
21330     int opcode = this->ideal_Opcode();
21331     int vlen_enc = vector_length_encoding(this);
21332     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21333     assert(is_integral_type(elem_bt), "");
21334     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21335   %}
21336   ins_pipe( pipe_slow );
21337 %}
21338 
21339 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21340   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21341   match(Set dst (UMinV a (LoadVector b)));
21342   match(Set dst (UMaxV a (LoadVector b)));
21343   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21344   ins_encode %{
21345     int opcode = this->ideal_Opcode();
21346     int vlen_enc = vector_length_encoding(this);
21347     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21348     assert(is_integral_type(elem_bt), "");
21349     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21350   %}
21351   ins_pipe( pipe_slow );
21352 %}
21353 
21354 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21355   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21356   match(Set dst (UMinV a b));
21357   match(Set dst (UMaxV a b));
21358   effect(TEMP xtmp1, TEMP xtmp2);
21359   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21360   ins_encode %{
21361     int opcode = this->ideal_Opcode();
21362     int vlen_enc = vector_length_encoding(this);
21363     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21364   %}
21365   ins_pipe( pipe_slow );
21366 %}
21367 
21368 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21369   match(Set dst (UMinV (Binary dst src2) mask));
21370   match(Set dst (UMaxV (Binary dst src2) mask));
21371   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21372   ins_encode %{
21373     int vlen_enc = vector_length_encoding(this);
21374     BasicType bt = Matcher::vector_element_basic_type(this);
21375     int opc = this->ideal_Opcode();
21376     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21377                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21378   %}
21379   ins_pipe( pipe_slow );
21380 %}
21381 
21382 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21383   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21384   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21385   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21386   ins_encode %{
21387     int vlen_enc = vector_length_encoding(this);
21388     BasicType bt = Matcher::vector_element_basic_type(this);
21389     int opc = this->ideal_Opcode();
21390     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21391                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21392   %}
21393   ins_pipe( pipe_slow );
21394 %}
21395 
21396 // --------------------------------- Signum/CopySign ---------------------------
21397 
21398 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21399   match(Set dst (SignumF dst (Binary zero one)));
21400   effect(KILL cr);
21401   format %{ "signumF $dst, $dst" %}
21402   ins_encode %{
21403     int opcode = this->ideal_Opcode();
21404     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21405   %}
21406   ins_pipe( pipe_slow );
21407 %}
21408 
21409 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21410   match(Set dst (SignumD dst (Binary zero one)));
21411   effect(KILL cr);
21412   format %{ "signumD $dst, $dst" %}
21413   ins_encode %{
21414     int opcode = this->ideal_Opcode();
21415     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21416   %}
21417   ins_pipe( pipe_slow );
21418 %}
21419 
21420 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21421   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21422   match(Set dst (SignumVF src (Binary zero one)));
21423   match(Set dst (SignumVD src (Binary zero one)));
21424   effect(TEMP dst, TEMP xtmp1);
21425   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21426   ins_encode %{
21427     int opcode = this->ideal_Opcode();
21428     int vec_enc = vector_length_encoding(this);
21429     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21430                          $xtmp1$$XMMRegister, vec_enc);
21431   %}
21432   ins_pipe( pipe_slow );
21433 %}
21434 
21435 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21436   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21437   match(Set dst (SignumVF src (Binary zero one)));
21438   match(Set dst (SignumVD src (Binary zero one)));
21439   effect(TEMP dst, TEMP ktmp1);
21440   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21441   ins_encode %{
21442     int opcode = this->ideal_Opcode();
21443     int vec_enc = vector_length_encoding(this);
21444     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21445                           $ktmp1$$KRegister, vec_enc);
21446   %}
21447   ins_pipe( pipe_slow );
21448 %}
21449 
21450 // ---------------------------------------
21451 // For copySign use 0xE4 as writemask for vpternlog
21452 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21453 // C (xmm2) is set to 0x7FFFFFFF
21454 // Wherever xmm2 is 0, we want to pick from B (sign)
21455 // Wherever xmm2 is 1, we want to pick from A (src)
21456 //
21457 // A B C Result
21458 // 0 0 0 0
21459 // 0 0 1 0
21460 // 0 1 0 1
21461 // 0 1 1 0
21462 // 1 0 0 0
21463 // 1 0 1 1
21464 // 1 1 0 1
21465 // 1 1 1 1
21466 //
21467 // Result going from high bit to low bit is 0x11100100 = 0xe4
21468 // ---------------------------------------
21469 
21470 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21471   match(Set dst (CopySignF dst src));
21472   effect(TEMP tmp1, TEMP tmp2);
21473   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21474   ins_encode %{
21475     __ movl($tmp2$$Register, 0x7FFFFFFF);
21476     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21477     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21478   %}
21479   ins_pipe( pipe_slow );
21480 %}
21481 
21482 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21483   match(Set dst (CopySignD dst (Binary src zero)));
21484   ins_cost(100);
21485   effect(TEMP tmp1, TEMP tmp2);
21486   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21487   ins_encode %{
21488     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21489     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21490     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21491   %}
21492   ins_pipe( pipe_slow );
21493 %}
21494 
21495 //----------------------------- CompressBits/ExpandBits ------------------------
21496 
21497 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21498   predicate(n->bottom_type()->isa_int());
21499   match(Set dst (CompressBits src mask));
21500   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21501   ins_encode %{
21502     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21503   %}
21504   ins_pipe( pipe_slow );
21505 %}
21506 
21507 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21508   predicate(n->bottom_type()->isa_int());
21509   match(Set dst (ExpandBits src mask));
21510   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21511   ins_encode %{
21512     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21513   %}
21514   ins_pipe( pipe_slow );
21515 %}
21516 
21517 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21518   predicate(n->bottom_type()->isa_int());
21519   match(Set dst (CompressBits src (LoadI mask)));
21520   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21521   ins_encode %{
21522     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21523   %}
21524   ins_pipe( pipe_slow );
21525 %}
21526 
21527 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21528   predicate(n->bottom_type()->isa_int());
21529   match(Set dst (ExpandBits src (LoadI mask)));
21530   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21531   ins_encode %{
21532     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21533   %}
21534   ins_pipe( pipe_slow );
21535 %}
21536 
21537 // --------------------------------- Sqrt --------------------------------------
21538 
21539 instruct vsqrtF_reg(vec dst, vec src) %{
21540   match(Set dst (SqrtVF src));
21541   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21542   ins_encode %{
21543     assert(UseAVX > 0, "required");
21544     int vlen_enc = vector_length_encoding(this);
21545     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21546   %}
21547   ins_pipe( pipe_slow );
21548 %}
21549 
21550 instruct vsqrtF_mem(vec dst, memory mem) %{
21551   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21552   match(Set dst (SqrtVF (LoadVector mem)));
21553   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21554   ins_encode %{
21555     assert(UseAVX > 0, "required");
21556     int vlen_enc = vector_length_encoding(this);
21557     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21558   %}
21559   ins_pipe( pipe_slow );
21560 %}
21561 
21562 // Floating point vector sqrt
21563 instruct vsqrtD_reg(vec dst, vec src) %{
21564   match(Set dst (SqrtVD src));
21565   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21566   ins_encode %{
21567     assert(UseAVX > 0, "required");
21568     int vlen_enc = vector_length_encoding(this);
21569     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21570   %}
21571   ins_pipe( pipe_slow );
21572 %}
21573 
21574 instruct vsqrtD_mem(vec dst, memory mem) %{
21575   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21576   match(Set dst (SqrtVD (LoadVector mem)));
21577   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21578   ins_encode %{
21579     assert(UseAVX > 0, "required");
21580     int vlen_enc = vector_length_encoding(this);
21581     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21582   %}
21583   ins_pipe( pipe_slow );
21584 %}
21585 
21586 // ------------------------------ Shift ---------------------------------------
21587 
21588 // Left and right shift count vectors are the same on x86
21589 // (only lowest bits of xmm reg are used for count).
21590 instruct vshiftcnt(vec dst, rRegI cnt) %{
21591   match(Set dst (LShiftCntV cnt));
21592   match(Set dst (RShiftCntV cnt));
21593   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21594   ins_encode %{
21595     __ movdl($dst$$XMMRegister, $cnt$$Register);
21596   %}
21597   ins_pipe( pipe_slow );
21598 %}
21599 
21600 // Byte vector shift
21601 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21602   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21603   match(Set dst ( LShiftVB src shift));
21604   match(Set dst ( RShiftVB src shift));
21605   match(Set dst (URShiftVB src shift));
21606   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21607   format %{"vector_byte_shift $dst,$src,$shift" %}
21608   ins_encode %{
21609     assert(UseSSE > 3, "required");
21610     int opcode = this->ideal_Opcode();
21611     bool sign = (opcode != Op_URShiftVB);
21612     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21613     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21614     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21615     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21616     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21617   %}
21618   ins_pipe( pipe_slow );
21619 %}
21620 
21621 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21622   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21623             UseAVX <= 1);
21624   match(Set dst ( LShiftVB src shift));
21625   match(Set dst ( RShiftVB src shift));
21626   match(Set dst (URShiftVB src shift));
21627   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21628   format %{"vector_byte_shift $dst,$src,$shift" %}
21629   ins_encode %{
21630     assert(UseSSE > 3, "required");
21631     int opcode = this->ideal_Opcode();
21632     bool sign = (opcode != Op_URShiftVB);
21633     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21634     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21635     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21636     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21637     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21638     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21639     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21640     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21641     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21642   %}
21643   ins_pipe( pipe_slow );
21644 %}
21645 
21646 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21647   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21648             UseAVX > 1);
21649   match(Set dst ( LShiftVB src shift));
21650   match(Set dst ( RShiftVB src shift));
21651   match(Set dst (URShiftVB src shift));
21652   effect(TEMP dst, TEMP tmp);
21653   format %{"vector_byte_shift $dst,$src,$shift" %}
21654   ins_encode %{
21655     int opcode = this->ideal_Opcode();
21656     bool sign = (opcode != Op_URShiftVB);
21657     int vlen_enc = Assembler::AVX_256bit;
21658     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21659     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21660     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21661     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21662     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21663   %}
21664   ins_pipe( pipe_slow );
21665 %}
21666 
21667 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21668   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21669   match(Set dst ( LShiftVB src shift));
21670   match(Set dst ( RShiftVB src shift));
21671   match(Set dst (URShiftVB src shift));
21672   effect(TEMP dst, TEMP tmp);
21673   format %{"vector_byte_shift $dst,$src,$shift" %}
21674   ins_encode %{
21675     assert(UseAVX > 1, "required");
21676     int opcode = this->ideal_Opcode();
21677     bool sign = (opcode != Op_URShiftVB);
21678     int vlen_enc = Assembler::AVX_256bit;
21679     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21680     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21681     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21682     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21683     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21684     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21685     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21686     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21687     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21688   %}
21689   ins_pipe( pipe_slow );
21690 %}
21691 
21692 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21693   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21694   match(Set dst ( LShiftVB src shift));
21695   match(Set dst  (RShiftVB src shift));
21696   match(Set dst (URShiftVB src shift));
21697   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21698   format %{"vector_byte_shift $dst,$src,$shift" %}
21699   ins_encode %{
21700     assert(UseAVX > 2, "required");
21701     int opcode = this->ideal_Opcode();
21702     bool sign = (opcode != Op_URShiftVB);
21703     int vlen_enc = Assembler::AVX_512bit;
21704     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21705     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21706     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21707     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21708     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21709     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21710     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21711     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21712     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21713     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21714     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21715     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21716   %}
21717   ins_pipe( pipe_slow );
21718 %}
21719 
21720 // Shorts vector logical right shift produces incorrect Java result
21721 // for negative data because java code convert short value into int with
21722 // sign extension before a shift. But char vectors are fine since chars are
21723 // unsigned values.
21724 // Shorts/Chars vector left shift
21725 instruct vshiftS(vec dst, vec src, vec shift) %{
21726   predicate(!n->as_ShiftV()->is_var_shift());
21727   match(Set dst ( LShiftVS src shift));
21728   match(Set dst ( RShiftVS src shift));
21729   match(Set dst (URShiftVS src shift));
21730   effect(TEMP dst, USE src, USE shift);
21731   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21732   ins_encode %{
21733     int opcode = this->ideal_Opcode();
21734     if (UseAVX > 0) {
21735       int vlen_enc = vector_length_encoding(this);
21736       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21737     } else {
21738       int vlen = Matcher::vector_length(this);
21739       if (vlen == 2) {
21740         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21741         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21742       } else if (vlen == 4) {
21743         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21744         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21745       } else {
21746         assert (vlen == 8, "sanity");
21747         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21748         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21749       }
21750     }
21751   %}
21752   ins_pipe( pipe_slow );
21753 %}
21754 
21755 // Integers vector left shift
21756 instruct vshiftI(vec dst, vec src, vec shift) %{
21757   predicate(!n->as_ShiftV()->is_var_shift());
21758   match(Set dst ( LShiftVI src shift));
21759   match(Set dst ( RShiftVI src shift));
21760   match(Set dst (URShiftVI src shift));
21761   effect(TEMP dst, USE src, USE shift);
21762   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21763   ins_encode %{
21764     int opcode = this->ideal_Opcode();
21765     if (UseAVX > 0) {
21766       int vlen_enc = vector_length_encoding(this);
21767       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21768     } else {
21769       int vlen = Matcher::vector_length(this);
21770       if (vlen == 2) {
21771         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21772         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21773       } else {
21774         assert(vlen == 4, "sanity");
21775         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21776         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21777       }
21778     }
21779   %}
21780   ins_pipe( pipe_slow );
21781 %}
21782 
21783 // Integers vector left constant shift
21784 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21785   match(Set dst (LShiftVI src (LShiftCntV shift)));
21786   match(Set dst (RShiftVI src (RShiftCntV shift)));
21787   match(Set dst (URShiftVI src (RShiftCntV shift)));
21788   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21789   ins_encode %{
21790     int opcode = this->ideal_Opcode();
21791     if (UseAVX > 0) {
21792       int vector_len = vector_length_encoding(this);
21793       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21794     } else {
21795       int vlen = Matcher::vector_length(this);
21796       if (vlen == 2) {
21797         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21798         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21799       } else {
21800         assert(vlen == 4, "sanity");
21801         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21802         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21803       }
21804     }
21805   %}
21806   ins_pipe( pipe_slow );
21807 %}
21808 
21809 // Longs vector shift
21810 instruct vshiftL(vec dst, vec src, vec shift) %{
21811   predicate(!n->as_ShiftV()->is_var_shift());
21812   match(Set dst ( LShiftVL src shift));
21813   match(Set dst (URShiftVL src shift));
21814   effect(TEMP dst, USE src, USE shift);
21815   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21816   ins_encode %{
21817     int opcode = this->ideal_Opcode();
21818     if (UseAVX > 0) {
21819       int vlen_enc = vector_length_encoding(this);
21820       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21821     } else {
21822       assert(Matcher::vector_length(this) == 2, "");
21823       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21824       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21825     }
21826   %}
21827   ins_pipe( pipe_slow );
21828 %}
21829 
21830 // Longs vector constant shift
21831 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21832   match(Set dst (LShiftVL src (LShiftCntV shift)));
21833   match(Set dst (URShiftVL src (RShiftCntV shift)));
21834   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21835   ins_encode %{
21836     int opcode = this->ideal_Opcode();
21837     if (UseAVX > 0) {
21838       int vector_len = vector_length_encoding(this);
21839       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21840     } else {
21841       assert(Matcher::vector_length(this) == 2, "");
21842       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21843       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21844     }
21845   %}
21846   ins_pipe( pipe_slow );
21847 %}
21848 
21849 // -------------------ArithmeticRightShift -----------------------------------
21850 // Long vector arithmetic right shift
21851 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21852   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21853   match(Set dst (RShiftVL src shift));
21854   effect(TEMP dst, TEMP tmp);
21855   format %{ "vshiftq $dst,$src,$shift" %}
21856   ins_encode %{
21857     uint vlen = Matcher::vector_length(this);
21858     if (vlen == 2) {
21859       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21860       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21861       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21862       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21863       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21864       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21865     } else {
21866       assert(vlen == 4, "sanity");
21867       assert(UseAVX > 1, "required");
21868       int vlen_enc = Assembler::AVX_256bit;
21869       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21870       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21871       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21872       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21873       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21874     }
21875   %}
21876   ins_pipe( pipe_slow );
21877 %}
21878 
21879 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21880   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21881   match(Set dst (RShiftVL src shift));
21882   format %{ "vshiftq $dst,$src,$shift" %}
21883   ins_encode %{
21884     int vlen_enc = vector_length_encoding(this);
21885     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21886   %}
21887   ins_pipe( pipe_slow );
21888 %}
21889 
21890 // ------------------- Variable Shift -----------------------------
21891 // Byte variable shift
21892 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21893   predicate(Matcher::vector_length(n) <= 8 &&
21894             n->as_ShiftV()->is_var_shift() &&
21895             !VM_Version::supports_avx512bw());
21896   match(Set dst ( LShiftVB src shift));
21897   match(Set dst ( RShiftVB src shift));
21898   match(Set dst (URShiftVB src shift));
21899   effect(TEMP dst, TEMP vtmp);
21900   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21901   ins_encode %{
21902     assert(UseAVX >= 2, "required");
21903 
21904     int opcode = this->ideal_Opcode();
21905     int vlen_enc = Assembler::AVX_128bit;
21906     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21907     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21908   %}
21909   ins_pipe( pipe_slow );
21910 %}
21911 
21912 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21913   predicate(Matcher::vector_length(n) == 16 &&
21914             n->as_ShiftV()->is_var_shift() &&
21915             !VM_Version::supports_avx512bw());
21916   match(Set dst ( LShiftVB src shift));
21917   match(Set dst ( RShiftVB src shift));
21918   match(Set dst (URShiftVB src shift));
21919   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21920   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21921   ins_encode %{
21922     assert(UseAVX >= 2, "required");
21923 
21924     int opcode = this->ideal_Opcode();
21925     int vlen_enc = Assembler::AVX_128bit;
21926     // Shift lower half and get word result in dst
21927     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21928 
21929     // Shift upper half and get word result in vtmp1
21930     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21931     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21932     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21933 
21934     // Merge and down convert the two word results to byte in dst
21935     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21936   %}
21937   ins_pipe( pipe_slow );
21938 %}
21939 
21940 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21941   predicate(Matcher::vector_length(n) == 32 &&
21942             n->as_ShiftV()->is_var_shift() &&
21943             !VM_Version::supports_avx512bw());
21944   match(Set dst ( LShiftVB src shift));
21945   match(Set dst ( RShiftVB src shift));
21946   match(Set dst (URShiftVB src shift));
21947   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21948   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21949   ins_encode %{
21950     assert(UseAVX >= 2, "required");
21951 
21952     int opcode = this->ideal_Opcode();
21953     int vlen_enc = Assembler::AVX_128bit;
21954     // Process lower 128 bits and get result in dst
21955     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21956     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21957     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21958     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21959     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21960 
21961     // Process higher 128 bits and get result in vtmp3
21962     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21963     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21964     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21965     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21966     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21967     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21968     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21969 
21970     // Merge the two results in dst
21971     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21972   %}
21973   ins_pipe( pipe_slow );
21974 %}
21975 
21976 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21977   predicate(Matcher::vector_length(n) <= 32 &&
21978             n->as_ShiftV()->is_var_shift() &&
21979             VM_Version::supports_avx512bw());
21980   match(Set dst ( LShiftVB src shift));
21981   match(Set dst ( RShiftVB src shift));
21982   match(Set dst (URShiftVB src shift));
21983   effect(TEMP dst, TEMP vtmp);
21984   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21985   ins_encode %{
21986     assert(UseAVX > 2, "required");
21987 
21988     int opcode = this->ideal_Opcode();
21989     int vlen_enc = vector_length_encoding(this);
21990     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21991   %}
21992   ins_pipe( pipe_slow );
21993 %}
21994 
21995 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21996   predicate(Matcher::vector_length(n) == 64 &&
21997             n->as_ShiftV()->is_var_shift() &&
21998             VM_Version::supports_avx512bw());
21999   match(Set dst ( LShiftVB src shift));
22000   match(Set dst ( RShiftVB src shift));
22001   match(Set dst (URShiftVB src shift));
22002   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22003   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
22004   ins_encode %{
22005     assert(UseAVX > 2, "required");
22006 
22007     int opcode = this->ideal_Opcode();
22008     int vlen_enc = Assembler::AVX_256bit;
22009     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
22010     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
22011     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
22012     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
22013     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
22014   %}
22015   ins_pipe( pipe_slow );
22016 %}
22017 
22018 // Short variable shift
22019 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
22020   predicate(Matcher::vector_length(n) <= 8 &&
22021             n->as_ShiftV()->is_var_shift() &&
22022             !VM_Version::supports_avx512bw());
22023   match(Set dst ( LShiftVS src shift));
22024   match(Set dst ( RShiftVS src shift));
22025   match(Set dst (URShiftVS src shift));
22026   effect(TEMP dst, TEMP vtmp);
22027   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
22028   ins_encode %{
22029     assert(UseAVX >= 2, "required");
22030 
22031     int opcode = this->ideal_Opcode();
22032     bool sign = (opcode != Op_URShiftVS);
22033     int vlen_enc = Assembler::AVX_256bit;
22034     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
22035     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
22036     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22037     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22038     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
22039     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22040   %}
22041   ins_pipe( pipe_slow );
22042 %}
22043 
22044 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
22045   predicate(Matcher::vector_length(n) == 16 &&
22046             n->as_ShiftV()->is_var_shift() &&
22047             !VM_Version::supports_avx512bw());
22048   match(Set dst ( LShiftVS src shift));
22049   match(Set dst ( RShiftVS src shift));
22050   match(Set dst (URShiftVS src shift));
22051   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22052   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
22053   ins_encode %{
22054     assert(UseAVX >= 2, "required");
22055 
22056     int opcode = this->ideal_Opcode();
22057     bool sign = (opcode != Op_URShiftVS);
22058     int vlen_enc = Assembler::AVX_256bit;
22059     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
22060     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
22061     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22062     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22063     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22064 
22065     // Shift upper half, with result in dst using vtmp1 as TEMP
22066     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
22067     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
22068     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22069     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22070     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22071     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22072 
22073     // Merge lower and upper half result into dst
22074     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22075     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
22076   %}
22077   ins_pipe( pipe_slow );
22078 %}
22079 
22080 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
22081   predicate(n->as_ShiftV()->is_var_shift() &&
22082             VM_Version::supports_avx512bw());
22083   match(Set dst ( LShiftVS src shift));
22084   match(Set dst ( RShiftVS src shift));
22085   match(Set dst (URShiftVS src shift));
22086   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
22087   ins_encode %{
22088     assert(UseAVX > 2, "required");
22089 
22090     int opcode = this->ideal_Opcode();
22091     int vlen_enc = vector_length_encoding(this);
22092     if (!VM_Version::supports_avx512vl()) {
22093       vlen_enc = Assembler::AVX_512bit;
22094     }
22095     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22096   %}
22097   ins_pipe( pipe_slow );
22098 %}
22099 
22100 //Integer variable shift
22101 instruct vshiftI_var(vec dst, vec src, vec shift) %{
22102   predicate(n->as_ShiftV()->is_var_shift());
22103   match(Set dst ( LShiftVI src shift));
22104   match(Set dst ( RShiftVI src shift));
22105   match(Set dst (URShiftVI src shift));
22106   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
22107   ins_encode %{
22108     assert(UseAVX >= 2, "required");
22109 
22110     int opcode = this->ideal_Opcode();
22111     int vlen_enc = vector_length_encoding(this);
22112     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22113   %}
22114   ins_pipe( pipe_slow );
22115 %}
22116 
22117 //Long variable shift
22118 instruct vshiftL_var(vec dst, vec src, vec shift) %{
22119   predicate(n->as_ShiftV()->is_var_shift());
22120   match(Set dst ( LShiftVL src shift));
22121   match(Set dst (URShiftVL src shift));
22122   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
22123   ins_encode %{
22124     assert(UseAVX >= 2, "required");
22125 
22126     int opcode = this->ideal_Opcode();
22127     int vlen_enc = vector_length_encoding(this);
22128     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22129   %}
22130   ins_pipe( pipe_slow );
22131 %}
22132 
22133 //Long variable right shift arithmetic
22134 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
22135   predicate(Matcher::vector_length(n) <= 4 &&
22136             n->as_ShiftV()->is_var_shift() &&
22137             UseAVX == 2);
22138   match(Set dst (RShiftVL src shift));
22139   effect(TEMP dst, TEMP vtmp);
22140   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
22141   ins_encode %{
22142     int opcode = this->ideal_Opcode();
22143     int vlen_enc = vector_length_encoding(this);
22144     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
22145                  $vtmp$$XMMRegister);
22146   %}
22147   ins_pipe( pipe_slow );
22148 %}
22149 
22150 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
22151   predicate(n->as_ShiftV()->is_var_shift() &&
22152             UseAVX > 2);
22153   match(Set dst (RShiftVL src shift));
22154   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
22155   ins_encode %{
22156     int opcode = this->ideal_Opcode();
22157     int vlen_enc = vector_length_encoding(this);
22158     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22159   %}
22160   ins_pipe( pipe_slow );
22161 %}
22162 
22163 // --------------------------------- AND --------------------------------------
22164 
22165 instruct vand(vec dst, vec src) %{
22166   predicate(UseAVX == 0);
22167   match(Set dst (AndV dst src));
22168   format %{ "pand    $dst,$src\t! and vectors" %}
22169   ins_encode %{
22170     __ pand($dst$$XMMRegister, $src$$XMMRegister);
22171   %}
22172   ins_pipe( pipe_slow );
22173 %}
22174 
22175 instruct vand_reg(vec dst, vec src1, vec src2) %{
22176   predicate(UseAVX > 0);
22177   match(Set dst (AndV src1 src2));
22178   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
22179   ins_encode %{
22180     int vlen_enc = vector_length_encoding(this);
22181     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22182   %}
22183   ins_pipe( pipe_slow );
22184 %}
22185 
22186 instruct vand_mem(vec dst, vec src, memory mem) %{
22187   predicate((UseAVX > 0) &&
22188             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22189   match(Set dst (AndV src (LoadVector mem)));
22190   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
22191   ins_encode %{
22192     int vlen_enc = vector_length_encoding(this);
22193     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22194   %}
22195   ins_pipe( pipe_slow );
22196 %}
22197 
22198 // --------------------------------- OR ---------------------------------------
22199 
22200 instruct vor(vec dst, vec src) %{
22201   predicate(UseAVX == 0);
22202   match(Set dst (OrV dst src));
22203   format %{ "por     $dst,$src\t! or vectors" %}
22204   ins_encode %{
22205     __ por($dst$$XMMRegister, $src$$XMMRegister);
22206   %}
22207   ins_pipe( pipe_slow );
22208 %}
22209 
22210 instruct vor_reg(vec dst, vec src1, vec src2) %{
22211   predicate(UseAVX > 0);
22212   match(Set dst (OrV src1 src2));
22213   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
22214   ins_encode %{
22215     int vlen_enc = vector_length_encoding(this);
22216     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22217   %}
22218   ins_pipe( pipe_slow );
22219 %}
22220 
22221 instruct vor_mem(vec dst, vec src, memory mem) %{
22222   predicate((UseAVX > 0) &&
22223             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22224   match(Set dst (OrV src (LoadVector mem)));
22225   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
22226   ins_encode %{
22227     int vlen_enc = vector_length_encoding(this);
22228     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22229   %}
22230   ins_pipe( pipe_slow );
22231 %}
22232 
22233 // --------------------------------- XOR --------------------------------------
22234 
22235 instruct vxor(vec dst, vec src) %{
22236   predicate(UseAVX == 0);
22237   match(Set dst (XorV dst src));
22238   format %{ "pxor    $dst,$src\t! xor vectors" %}
22239   ins_encode %{
22240     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
22241   %}
22242   ins_pipe( pipe_slow );
22243 %}
22244 
22245 instruct vxor_reg(vec dst, vec src1, vec src2) %{
22246   predicate(UseAVX > 0);
22247   match(Set dst (XorV src1 src2));
22248   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
22249   ins_encode %{
22250     int vlen_enc = vector_length_encoding(this);
22251     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22252   %}
22253   ins_pipe( pipe_slow );
22254 %}
22255 
22256 instruct vxor_mem(vec dst, vec src, memory mem) %{
22257   predicate((UseAVX > 0) &&
22258             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22259   match(Set dst (XorV src (LoadVector mem)));
22260   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
22261   ins_encode %{
22262     int vlen_enc = vector_length_encoding(this);
22263     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22264   %}
22265   ins_pipe( pipe_slow );
22266 %}
22267 
22268 // --------------------------------- VectorCast --------------------------------------
22269 
22270 instruct vcastBtoX(vec dst, vec src) %{
22271   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
22272   match(Set dst (VectorCastB2X src));
22273   format %{ "vector_cast_b2x $dst,$src\t!" %}
22274   ins_encode %{
22275     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22276     int vlen_enc = vector_length_encoding(this);
22277     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22278   %}
22279   ins_pipe( pipe_slow );
22280 %}
22281 
22282 instruct vcastBtoD(legVec dst, legVec src) %{
22283   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
22284   match(Set dst (VectorCastB2X src));
22285   format %{ "vector_cast_b2x $dst,$src\t!" %}
22286   ins_encode %{
22287     int vlen_enc = vector_length_encoding(this);
22288     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22289   %}
22290   ins_pipe( pipe_slow );
22291 %}
22292 
22293 instruct castStoX(vec dst, vec src) %{
22294   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22295             Matcher::vector_length(n->in(1)) <= 8 && // src
22296             Matcher::vector_element_basic_type(n) == T_BYTE);
22297   match(Set dst (VectorCastS2X src));
22298   format %{ "vector_cast_s2x $dst,$src" %}
22299   ins_encode %{
22300     assert(UseAVX > 0, "required");
22301 
22302     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
22303     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
22304   %}
22305   ins_pipe( pipe_slow );
22306 %}
22307 
22308 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22309   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22310             Matcher::vector_length(n->in(1)) == 16 && // src
22311             Matcher::vector_element_basic_type(n) == T_BYTE);
22312   effect(TEMP dst, TEMP vtmp);
22313   match(Set dst (VectorCastS2X src));
22314   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22315   ins_encode %{
22316     assert(UseAVX > 0, "required");
22317 
22318     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22319     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22320     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22321     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22322   %}
22323   ins_pipe( pipe_slow );
22324 %}
22325 
22326 instruct vcastStoX_evex(vec dst, vec src) %{
22327   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22328             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22329   match(Set dst (VectorCastS2X src));
22330   format %{ "vector_cast_s2x $dst,$src\t!" %}
22331   ins_encode %{
22332     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22333     int src_vlen_enc = vector_length_encoding(this, $src);
22334     int vlen_enc = vector_length_encoding(this);
22335     switch (to_elem_bt) {
22336       case T_BYTE:
22337         if (!VM_Version::supports_avx512vl()) {
22338           vlen_enc = Assembler::AVX_512bit;
22339         }
22340         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22341         break;
22342       case T_INT:
22343         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22344         break;
22345       case T_FLOAT:
22346         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22347         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22348         break;
22349       case T_LONG:
22350         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22351         break;
22352       case T_DOUBLE: {
22353         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22354         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22355         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22356         break;
22357       }
22358       default:
22359         ShouldNotReachHere();
22360     }
22361   %}
22362   ins_pipe( pipe_slow );
22363 %}
22364 
22365 instruct castItoX(vec dst, vec src) %{
22366   predicate(UseAVX <= 2 &&
22367             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22368             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22369   match(Set dst (VectorCastI2X src));
22370   format %{ "vector_cast_i2x $dst,$src" %}
22371   ins_encode %{
22372     assert(UseAVX > 0, "required");
22373 
22374     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22375     int vlen_enc = vector_length_encoding(this, $src);
22376 
22377     if (to_elem_bt == T_BYTE) {
22378       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22379       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22380       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22381     } else {
22382       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22383       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22384       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22385     }
22386   %}
22387   ins_pipe( pipe_slow );
22388 %}
22389 
22390 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22391   predicate(UseAVX <= 2 &&
22392             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22393             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22394   match(Set dst (VectorCastI2X src));
22395   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22396   effect(TEMP dst, TEMP vtmp);
22397   ins_encode %{
22398     assert(UseAVX > 0, "required");
22399 
22400     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22401     int vlen_enc = vector_length_encoding(this, $src);
22402 
22403     if (to_elem_bt == T_BYTE) {
22404       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22405       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22406       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22407       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22408     } else {
22409       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22410       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22411       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22412       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22413     }
22414   %}
22415   ins_pipe( pipe_slow );
22416 %}
22417 
22418 instruct vcastItoX_evex(vec dst, vec src) %{
22419   predicate(UseAVX > 2 ||
22420             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22421   match(Set dst (VectorCastI2X src));
22422   format %{ "vector_cast_i2x $dst,$src\t!" %}
22423   ins_encode %{
22424     assert(UseAVX > 0, "required");
22425 
22426     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22427     int src_vlen_enc = vector_length_encoding(this, $src);
22428     int dst_vlen_enc = vector_length_encoding(this);
22429     switch (dst_elem_bt) {
22430       case T_BYTE:
22431         if (!VM_Version::supports_avx512vl()) {
22432           src_vlen_enc = Assembler::AVX_512bit;
22433         }
22434         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22435         break;
22436       case T_SHORT:
22437         if (!VM_Version::supports_avx512vl()) {
22438           src_vlen_enc = Assembler::AVX_512bit;
22439         }
22440         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22441         break;
22442       case T_FLOAT:
22443         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22444         break;
22445       case T_LONG:
22446         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22447         break;
22448       case T_DOUBLE:
22449         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22450         break;
22451       default:
22452         ShouldNotReachHere();
22453     }
22454   %}
22455   ins_pipe( pipe_slow );
22456 %}
22457 
22458 instruct vcastLtoBS(vec dst, vec src) %{
22459   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22460             UseAVX <= 2);
22461   match(Set dst (VectorCastL2X src));
22462   format %{ "vector_cast_l2x  $dst,$src" %}
22463   ins_encode %{
22464     assert(UseAVX > 0, "required");
22465 
22466     int vlen = Matcher::vector_length_in_bytes(this, $src);
22467     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
22468     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22469                                                       : ExternalAddress(vector_int_to_short_mask());
22470     if (vlen <= 16) {
22471       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22472       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22473       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22474     } else {
22475       assert(vlen <= 32, "required");
22476       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22477       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22478       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22479       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22480     }
22481     if (to_elem_bt == T_BYTE) {
22482       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22483     }
22484   %}
22485   ins_pipe( pipe_slow );
22486 %}
22487 
22488 instruct vcastLtoX_evex(vec dst, vec src) %{
22489   predicate(UseAVX > 2 ||
22490             (Matcher::vector_element_basic_type(n) == T_INT ||
22491              Matcher::vector_element_basic_type(n) == T_FLOAT ||
22492              Matcher::vector_element_basic_type(n) == T_DOUBLE));
22493   match(Set dst (VectorCastL2X src));
22494   format %{ "vector_cast_l2x  $dst,$src\t!" %}
22495   ins_encode %{
22496     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22497     int vlen = Matcher::vector_length_in_bytes(this, $src);
22498     int vlen_enc = vector_length_encoding(this, $src);
22499     switch (to_elem_bt) {
22500       case T_BYTE:
22501         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22502           vlen_enc = Assembler::AVX_512bit;
22503         }
22504         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22505         break;
22506       case T_SHORT:
22507         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22508           vlen_enc = Assembler::AVX_512bit;
22509         }
22510         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22511         break;
22512       case T_INT:
22513         if (vlen == 8) {
22514           if ($dst$$XMMRegister != $src$$XMMRegister) {
22515             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22516           }
22517         } else if (vlen == 16) {
22518           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22519         } else if (vlen == 32) {
22520           if (UseAVX > 2) {
22521             if (!VM_Version::supports_avx512vl()) {
22522               vlen_enc = Assembler::AVX_512bit;
22523             }
22524             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22525           } else {
22526             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22527             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22528           }
22529         } else { // vlen == 64
22530           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22531         }
22532         break;
22533       case T_FLOAT:
22534         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22535         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22536         break;
22537       case T_DOUBLE:
22538         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22539         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22540         break;
22541 
22542       default: assert(false, "%s", type2name(to_elem_bt));
22543     }
22544   %}
22545   ins_pipe( pipe_slow );
22546 %}
22547 
22548 instruct vcastFtoD_reg(vec dst, vec src) %{
22549   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22550   match(Set dst (VectorCastF2X src));
22551   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22552   ins_encode %{
22553     int vlen_enc = vector_length_encoding(this);
22554     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22555   %}
22556   ins_pipe( pipe_slow );
22557 %}
22558 
22559 
22560 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22561   predicate(!VM_Version::supports_avx10_2() &&
22562             !VM_Version::supports_avx512vl() &&
22563             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22564             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22565             is_integral_type(Matcher::vector_element_basic_type(n)));
22566   match(Set dst (VectorCastF2X src));
22567   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22568   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22569   ins_encode %{
22570     int vlen_enc = vector_length_encoding(this, $src);
22571     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22572     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22573     // 32 bit addresses for register indirect addressing mode since stub constants
22574     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22575     // However, targets are free to increase this limit, but having a large code cache size
22576     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22577     // cap we save a temporary register allocation which in limiting case can prevent
22578     // spilling in high register pressure blocks.
22579     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22580                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22581                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22582   %}
22583   ins_pipe( pipe_slow );
22584 %}
22585 
22586 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22587   predicate(!VM_Version::supports_avx10_2() &&
22588             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22589             is_integral_type(Matcher::vector_element_basic_type(n)));
22590   match(Set dst (VectorCastF2X src));
22591   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22592   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22593   ins_encode %{
22594     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22595     if (to_elem_bt == T_LONG) {
22596       int vlen_enc = vector_length_encoding(this);
22597       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22598                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22599                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22600     } else {
22601       int vlen_enc = vector_length_encoding(this, $src);
22602       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22603                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22604                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22605     }
22606   %}
22607   ins_pipe( pipe_slow );
22608 %}
22609 
22610 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22611   predicate(VM_Version::supports_avx10_2() &&
22612             is_integral_type(Matcher::vector_element_basic_type(n)));
22613   match(Set dst (VectorCastF2X src));
22614   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22615   ins_encode %{
22616     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22617     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22618     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22619   %}
22620   ins_pipe( pipe_slow );
22621 %}
22622 
22623 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22624   predicate(VM_Version::supports_avx10_2() &&
22625             is_integral_type(Matcher::vector_element_basic_type(n)));
22626   match(Set dst (VectorCastF2X (LoadVector src)));
22627   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22628   ins_encode %{
22629     int vlen = Matcher::vector_length(this);
22630     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22631     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22632     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22633   %}
22634   ins_pipe( pipe_slow );
22635 %}
22636 
22637 instruct vcastDtoF_reg(vec dst, vec src) %{
22638   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22639   match(Set dst (VectorCastD2X src));
22640   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22641   ins_encode %{
22642     int vlen_enc = vector_length_encoding(this, $src);
22643     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22644   %}
22645   ins_pipe( pipe_slow );
22646 %}
22647 
22648 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22649   predicate(!VM_Version::supports_avx10_2() &&
22650             !VM_Version::supports_avx512vl() &&
22651             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22652             is_integral_type(Matcher::vector_element_basic_type(n)));
22653   match(Set dst (VectorCastD2X src));
22654   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22655   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22656   ins_encode %{
22657     int vlen_enc = vector_length_encoding(this, $src);
22658     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22659     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22660                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22661                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22662   %}
22663   ins_pipe( pipe_slow );
22664 %}
22665 
22666 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22667   predicate(!VM_Version::supports_avx10_2() &&
22668             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22669             is_integral_type(Matcher::vector_element_basic_type(n)));
22670   match(Set dst (VectorCastD2X src));
22671   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22672   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22673   ins_encode %{
22674     int vlen_enc = vector_length_encoding(this, $src);
22675     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22676     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22677                               ExternalAddress(vector_float_signflip());
22678     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22679                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22680   %}
22681   ins_pipe( pipe_slow );
22682 %}
22683 
22684 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22685   predicate(VM_Version::supports_avx10_2() &&
22686             is_integral_type(Matcher::vector_element_basic_type(n)));
22687   match(Set dst (VectorCastD2X src));
22688   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22689   ins_encode %{
22690     int vlen_enc = vector_length_encoding(this, $src);
22691     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22692     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22693   %}
22694   ins_pipe( pipe_slow );
22695 %}
22696 
22697 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22698   predicate(VM_Version::supports_avx10_2() &&
22699             is_integral_type(Matcher::vector_element_basic_type(n)));
22700   match(Set dst (VectorCastD2X (LoadVector src)));
22701   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22702   ins_encode %{
22703     int vlen = Matcher::vector_length(this);
22704     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22705     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22706     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22707   %}
22708   ins_pipe( pipe_slow );
22709 %}
22710 
22711 instruct vucast(vec dst, vec src) %{
22712   match(Set dst (VectorUCastB2X src));
22713   match(Set dst (VectorUCastS2X src));
22714   match(Set dst (VectorUCastI2X src));
22715   format %{ "vector_ucast $dst,$src\t!" %}
22716   ins_encode %{
22717     assert(UseAVX > 0, "required");
22718 
22719     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22720     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22721     int vlen_enc = vector_length_encoding(this);
22722     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22723   %}
22724   ins_pipe( pipe_slow );
22725 %}
22726 
22727 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22728   predicate(!VM_Version::supports_avx512vl() &&
22729             Matcher::vector_length_in_bytes(n) < 64 &&
22730             Matcher::vector_element_basic_type(n) == T_INT);
22731   match(Set dst (RoundVF src));
22732   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22733   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22734   ins_encode %{
22735     int vlen_enc = vector_length_encoding(this);
22736     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22737     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22738                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22739                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22740   %}
22741   ins_pipe( pipe_slow );
22742 %}
22743 
22744 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22745   predicate((VM_Version::supports_avx512vl() ||
22746              Matcher::vector_length_in_bytes(n) == 64) &&
22747              Matcher::vector_element_basic_type(n) == T_INT);
22748   match(Set dst (RoundVF src));
22749   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22750   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22751   ins_encode %{
22752     int vlen_enc = vector_length_encoding(this);
22753     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22754     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22755                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22756                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22757   %}
22758   ins_pipe( pipe_slow );
22759 %}
22760 
22761 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22762   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22763   match(Set dst (RoundVD src));
22764   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22765   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22766   ins_encode %{
22767     int vlen_enc = vector_length_encoding(this);
22768     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22769     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22770                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22771                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22772   %}
22773   ins_pipe( pipe_slow );
22774 %}
22775 
22776 // --------------------------------- VectorMaskCmp --------------------------------------
22777 
22778 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22779   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22780             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22781             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22782             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22783   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22784   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22785   ins_encode %{
22786     int vlen_enc = vector_length_encoding(this, $src1);
22787     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22788     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22789       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22790     } else {
22791       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22792     }
22793   %}
22794   ins_pipe( pipe_slow );
22795 %}
22796 
22797 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22798   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22799             n->bottom_type()->isa_pvectmask() == nullptr &&
22800             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22801   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22802   effect(TEMP ktmp);
22803   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22804   ins_encode %{
22805     int vlen_enc = Assembler::AVX_512bit;
22806     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22807     KRegister mask = k0; // The comparison itself is not being masked.
22808     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22809       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22810       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22811     } else {
22812       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22813       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22814     }
22815   %}
22816   ins_pipe( pipe_slow );
22817 %}
22818 
22819 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22820   predicate(n->bottom_type()->isa_pvectmask() &&
22821             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22822   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22823   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22824   ins_encode %{
22825     assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22826     int vlen_enc = vector_length_encoding(this, $src1);
22827     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22828     KRegister mask = k0; // The comparison itself is not being masked.
22829     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22830       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22831     } else {
22832       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22833     }
22834   %}
22835   ins_pipe( pipe_slow );
22836 %}
22837 
22838 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22839   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22840             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22841             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22842             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22843             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22844             (n->in(2)->get_int() == BoolTest::eq ||
22845              n->in(2)->get_int() == BoolTest::lt ||
22846              n->in(2)->get_int() == BoolTest::gt)); // cond
22847   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22848   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22849   ins_encode %{
22850     int vlen_enc = vector_length_encoding(this, $src1);
22851     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22852     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22853     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22854   %}
22855   ins_pipe( pipe_slow );
22856 %}
22857 
22858 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22859   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22860             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22861             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22862             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22863             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22864             (n->in(2)->get_int() == BoolTest::ne ||
22865              n->in(2)->get_int() == BoolTest::le ||
22866              n->in(2)->get_int() == BoolTest::ge)); // cond
22867   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22868   effect(TEMP dst, TEMP xtmp);
22869   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22870   ins_encode %{
22871     int vlen_enc = vector_length_encoding(this, $src1);
22872     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22873     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22874     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22875   %}
22876   ins_pipe( pipe_slow );
22877 %}
22878 
22879 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22880   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22881             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22882             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22883             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22884             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22885   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22886   effect(TEMP dst, TEMP xtmp);
22887   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22888   ins_encode %{
22889     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22890     int vlen_enc = vector_length_encoding(this, $src1);
22891     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22892     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22893 
22894     if (vlen_enc == Assembler::AVX_128bit) {
22895       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22896     } else {
22897       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22898     }
22899     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22900     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22901     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22902   %}
22903   ins_pipe( pipe_slow );
22904 %}
22905 
22906 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22907   predicate((n->bottom_type()->isa_pvectmask() == nullptr &&
22908              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22909              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22910   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22911   effect(TEMP ktmp);
22912   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22913   ins_encode %{
22914     assert(UseAVX > 2, "required");
22915 
22916     int vlen_enc = vector_length_encoding(this, $src1);
22917     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22918     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22919     KRegister mask = k0; // The comparison itself is not being masked.
22920     bool merge = false;
22921     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22922 
22923     switch (src1_elem_bt) {
22924       case T_INT: {
22925         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22926         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22927         break;
22928       }
22929       case T_LONG: {
22930         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22931         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22932         break;
22933       }
22934       default: assert(false, "%s", type2name(src1_elem_bt));
22935     }
22936   %}
22937   ins_pipe( pipe_slow );
22938 %}
22939 
22940 
22941 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22942   predicate(n->bottom_type()->isa_pvectmask() &&
22943             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22944   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22945   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22946   ins_encode %{
22947     assert(UseAVX > 2, "required");
22948     assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22949 
22950     int vlen_enc = vector_length_encoding(this, $src1);
22951     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22952     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22953     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22954 
22955     // Comparison i
22956     switch (src1_elem_bt) {
22957       case T_BYTE: {
22958         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22959         break;
22960       }
22961       case T_SHORT: {
22962         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22963         break;
22964       }
22965       case T_INT: {
22966         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22967         break;
22968       }
22969       case T_LONG: {
22970         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22971         break;
22972       }
22973       default: assert(false, "%s", type2name(src1_elem_bt));
22974     }
22975   %}
22976   ins_pipe( pipe_slow );
22977 %}
22978 
22979 // Extract
22980 
22981 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22982   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22983   match(Set dst (ExtractI src idx));
22984   match(Set dst (ExtractS src idx));
22985   match(Set dst (ExtractB src idx));
22986   format %{ "extractI $dst,$src,$idx\t!" %}
22987   ins_encode %{
22988     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22989 
22990     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22991     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22992   %}
22993   ins_pipe( pipe_slow );
22994 %}
22995 
22996 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22997   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22998             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22999   match(Set dst (ExtractI src idx));
23000   match(Set dst (ExtractS src idx));
23001   match(Set dst (ExtractB src idx));
23002   effect(TEMP vtmp);
23003   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
23004   ins_encode %{
23005     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23006 
23007     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
23008     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23009     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
23010   %}
23011   ins_pipe( pipe_slow );
23012 %}
23013 
23014 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
23015   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
23016   match(Set dst (ExtractL src idx));
23017   format %{ "extractL $dst,$src,$idx\t!" %}
23018   ins_encode %{
23019     assert(UseSSE >= 4, "required");
23020     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23021 
23022     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
23023   %}
23024   ins_pipe( pipe_slow );
23025 %}
23026 
23027 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
23028   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
23029             Matcher::vector_length(n->in(1)) == 8);  // src
23030   match(Set dst (ExtractL src idx));
23031   effect(TEMP vtmp);
23032   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
23033   ins_encode %{
23034     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23035 
23036     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23037     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
23038   %}
23039   ins_pipe( pipe_slow );
23040 %}
23041 
23042 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
23043   predicate(Matcher::vector_length(n->in(1)) <= 4);
23044   match(Set dst (ExtractF src idx));
23045   effect(TEMP dst, TEMP vtmp);
23046   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
23047   ins_encode %{
23048     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23049 
23050     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
23051   %}
23052   ins_pipe( pipe_slow );
23053 %}
23054 
23055 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
23056   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
23057             Matcher::vector_length(n->in(1)/*src*/) == 16);
23058   match(Set dst (ExtractF src idx));
23059   effect(TEMP vtmp);
23060   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
23061   ins_encode %{
23062     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23063 
23064     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23065     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
23066   %}
23067   ins_pipe( pipe_slow );
23068 %}
23069 
23070 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
23071   predicate(Matcher::vector_length(n->in(1)) == 2); // src
23072   match(Set dst (ExtractD src idx));
23073   format %{ "extractD $dst,$src,$idx\t!" %}
23074   ins_encode %{
23075     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23076 
23077     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23078   %}
23079   ins_pipe( pipe_slow );
23080 %}
23081 
23082 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
23083   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
23084             Matcher::vector_length(n->in(1)) == 8);  // src
23085   match(Set dst (ExtractD src idx));
23086   effect(TEMP vtmp);
23087   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
23088   ins_encode %{
23089     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23090 
23091     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23092     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
23093   %}
23094   ins_pipe( pipe_slow );
23095 %}
23096 
23097 // --------------------------------- Vector Blend --------------------------------------
23098 
23099 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
23100   predicate(UseAVX == 0);
23101   match(Set dst (VectorBlend (Binary dst src) mask));
23102   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
23103   effect(TEMP tmp);
23104   ins_encode %{
23105     assert(UseSSE >= 4, "required");
23106 
23107     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
23108       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
23109     }
23110     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
23111   %}
23112   ins_pipe( pipe_slow );
23113 %}
23114 
23115 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
23116   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23117             n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
23118             Matcher::vector_length_in_bytes(n) <= 32 &&
23119             is_integral_type(Matcher::vector_element_basic_type(n)));
23120   match(Set dst (VectorBlend (Binary src1 src2) mask));
23121   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
23122   ins_encode %{
23123     int vlen_enc = vector_length_encoding(this);
23124     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23125   %}
23126   ins_pipe( pipe_slow );
23127 %}
23128 
23129 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
23130   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23131             n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
23132             Matcher::vector_length_in_bytes(n) <= 32 &&
23133             !is_integral_type(Matcher::vector_element_basic_type(n)));
23134   match(Set dst (VectorBlend (Binary src1 src2) mask));
23135   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
23136   ins_encode %{
23137     int vlen_enc = vector_length_encoding(this);
23138     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23139   %}
23140   ins_pipe( pipe_slow );
23141 %}
23142 
23143 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
23144   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
23145             n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
23146             Matcher::vector_length_in_bytes(n) <= 32);
23147   match(Set dst (VectorBlend (Binary src1 src2) mask));
23148   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
23149   effect(TEMP vtmp, TEMP dst);
23150   ins_encode %{
23151     int vlen_enc = vector_length_encoding(this);
23152     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
23153     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23154     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
23155   %}
23156   ins_pipe( pipe_slow );
23157 %}
23158 
23159 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
23160   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
23161             n->in(2)->bottom_type()->isa_pvectmask() == nullptr);
23162   match(Set dst (VectorBlend (Binary src1 src2) mask));
23163   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23164   effect(TEMP ktmp);
23165   ins_encode %{
23166      int vlen_enc = Assembler::AVX_512bit;
23167      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23168     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
23169     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23170   %}
23171   ins_pipe( pipe_slow );
23172 %}
23173 
23174 
23175 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
23176   predicate(n->in(2)->bottom_type()->isa_pvectmask() &&
23177             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
23178              VM_Version::supports_avx512bw()));
23179   match(Set dst (VectorBlend (Binary src1 src2) mask));
23180   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23181   ins_encode %{
23182     int vlen_enc = vector_length_encoding(this);
23183     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23184     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23185   %}
23186   ins_pipe( pipe_slow );
23187 %}
23188 
23189 // --------------------------------- ABS --------------------------------------
23190 // a = |a|
23191 instruct vabsB_reg(vec dst, vec src) %{
23192   match(Set dst (AbsVB  src));
23193   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
23194   ins_encode %{
23195     uint vlen = Matcher::vector_length(this);
23196     if (vlen <= 16) {
23197       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23198     } else {
23199       int vlen_enc = vector_length_encoding(this);
23200       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23201     }
23202   %}
23203   ins_pipe( pipe_slow );
23204 %}
23205 
23206 instruct vabsS_reg(vec dst, vec src) %{
23207   match(Set dst (AbsVS  src));
23208   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
23209   ins_encode %{
23210     uint vlen = Matcher::vector_length(this);
23211     if (vlen <= 8) {
23212       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23213     } else {
23214       int vlen_enc = vector_length_encoding(this);
23215       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23216     }
23217   %}
23218   ins_pipe( pipe_slow );
23219 %}
23220 
23221 instruct vabsI_reg(vec dst, vec src) %{
23222   match(Set dst (AbsVI  src));
23223   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
23224   ins_encode %{
23225     uint vlen = Matcher::vector_length(this);
23226     if (vlen <= 4) {
23227       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23228     } else {
23229       int vlen_enc = vector_length_encoding(this);
23230       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23231     }
23232   %}
23233   ins_pipe( pipe_slow );
23234 %}
23235 
23236 instruct vabsL_reg(vec dst, vec src) %{
23237   match(Set dst (AbsVL  src));
23238   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
23239   ins_encode %{
23240     assert(UseAVX > 2, "required");
23241     int vlen_enc = vector_length_encoding(this);
23242     if (!VM_Version::supports_avx512vl()) {
23243       vlen_enc = Assembler::AVX_512bit;
23244     }
23245     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23246   %}
23247   ins_pipe( pipe_slow );
23248 %}
23249 
23250 // --------------------------------- ABSNEG --------------------------------------
23251 
23252 instruct vabsnegF(vec dst, vec src) %{
23253   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
23254   match(Set dst (AbsVF src));
23255   match(Set dst (NegVF src));
23256   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
23257   ins_cost(150);
23258   ins_encode %{
23259     int opcode = this->ideal_Opcode();
23260     int vlen = Matcher::vector_length(this);
23261     if (vlen == 2) {
23262       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23263     } else {
23264       assert(vlen == 8 || vlen == 16, "required");
23265       int vlen_enc = vector_length_encoding(this);
23266       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23267     }
23268   %}
23269   ins_pipe( pipe_slow );
23270 %}
23271 
23272 instruct vabsneg4F(vec dst) %{
23273   predicate(Matcher::vector_length(n) == 4);
23274   match(Set dst (AbsVF dst));
23275   match(Set dst (NegVF dst));
23276   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
23277   ins_cost(150);
23278   ins_encode %{
23279     int opcode = this->ideal_Opcode();
23280     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
23281   %}
23282   ins_pipe( pipe_slow );
23283 %}
23284 
23285 instruct vabsnegD(vec dst, vec src) %{
23286   match(Set dst (AbsVD  src));
23287   match(Set dst (NegVD  src));
23288   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
23289   ins_encode %{
23290     int opcode = this->ideal_Opcode();
23291     uint vlen = Matcher::vector_length(this);
23292     if (vlen == 2) {
23293       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23294     } else {
23295       int vlen_enc = vector_length_encoding(this);
23296       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23297     }
23298   %}
23299   ins_pipe( pipe_slow );
23300 %}
23301 
23302 //------------------------------------- VectorTest --------------------------------------------
23303 
23304 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
23305   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
23306   match(Set cr (VectorTest src1 src2));
23307   effect(TEMP vtmp);
23308   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
23309   ins_encode %{
23310     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23311     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23312     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23313   %}
23314   ins_pipe( pipe_slow );
23315 %}
23316 
23317 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23318   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23319   match(Set cr (VectorTest src1 src2));
23320   format %{ "vptest_ge16  $src1, $src2\n\t" %}
23321   ins_encode %{
23322     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23323     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23324     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23325   %}
23326   ins_pipe( pipe_slow );
23327 %}
23328 
23329 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23330   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23331              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23332             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23333   match(Set cr (VectorTest src1 src2));
23334   effect(TEMP tmp);
23335   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23336   ins_encode %{
23337     uint masklen = Matcher::vector_length(this, $src1);
23338     __ kmovwl($tmp$$Register, $src1$$KRegister);
23339     __ andl($tmp$$Register, (1 << masklen) - 1);
23340     __ cmpl($tmp$$Register, (1 << masklen) - 1);
23341   %}
23342   ins_pipe( pipe_slow );
23343 %}
23344 
23345 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23346   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23347              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23348             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23349   match(Set cr (VectorTest src1 src2));
23350   effect(TEMP tmp);
23351   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23352   ins_encode %{
23353     uint masklen = Matcher::vector_length(this, $src1);
23354     __ kmovwl($tmp$$Register, $src1$$KRegister);
23355     __ andl($tmp$$Register, (1 << masklen) - 1);
23356   %}
23357   ins_pipe( pipe_slow );
23358 %}
23359 
23360 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23361   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23362             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23363   match(Set cr (VectorTest src1 src2));
23364   format %{ "ktest_ge8  $src1, $src2\n\t" %}
23365   ins_encode %{
23366     uint masklen = Matcher::vector_length(this, $src1);
23367     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23368   %}
23369   ins_pipe( pipe_slow );
23370 %}
23371 
23372 //------------------------------------- LoadMask --------------------------------------------
23373 
23374 instruct loadMask(legVec dst, legVec src) %{
23375   predicate(n->bottom_type()->isa_pvectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23376   match(Set dst (VectorLoadMask src));
23377   effect(TEMP dst);
23378   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23379   ins_encode %{
23380     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23381     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23382     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23383   %}
23384   ins_pipe( pipe_slow );
23385 %}
23386 
23387 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23388   predicate(n->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
23389   match(Set dst (VectorLoadMask src));
23390   effect(TEMP xtmp);
23391   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23392   ins_encode %{
23393     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23394                         true, Assembler::AVX_512bit);
23395   %}
23396   ins_pipe( pipe_slow );
23397 %}
23398 
23399 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
23400   predicate(n->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
23401   match(Set dst (VectorLoadMask src));
23402   effect(TEMP xtmp);
23403   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23404   ins_encode %{
23405     int vlen_enc = vector_length_encoding(in(1));
23406     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23407                         false, vlen_enc);
23408   %}
23409   ins_pipe( pipe_slow );
23410 %}
23411 
23412 //------------------------------------- StoreMask --------------------------------------------
23413 
23414 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23415   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23416   match(Set dst (VectorStoreMask src size));
23417   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23418   ins_encode %{
23419     int vlen = Matcher::vector_length(this);
23420     if (vlen <= 16 && UseAVX <= 2) {
23421       assert(UseSSE >= 3, "required");
23422       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23423     } else {
23424       assert(UseAVX > 0, "required");
23425       int src_vlen_enc = vector_length_encoding(this, $src);
23426       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23427     }
23428   %}
23429   ins_pipe( pipe_slow );
23430 %}
23431 
23432 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23433   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23434   match(Set dst (VectorStoreMask src size));
23435   effect(TEMP_DEF dst, TEMP xtmp);
23436   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23437   ins_encode %{
23438     int vlen_enc = Assembler::AVX_128bit;
23439     int vlen = Matcher::vector_length(this);
23440     if (vlen <= 8) {
23441       assert(UseSSE >= 3, "required");
23442       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23443       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23444       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23445     } else {
23446       assert(UseAVX > 0, "required");
23447       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23448       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23449       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23450     }
23451   %}
23452   ins_pipe( pipe_slow );
23453 %}
23454 
23455 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23456   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23457   match(Set dst (VectorStoreMask src size));
23458   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23459   effect(TEMP_DEF dst, TEMP xtmp);
23460   ins_encode %{
23461     int vlen_enc = Assembler::AVX_128bit;
23462     int vlen = Matcher::vector_length(this);
23463     if (vlen <= 4) {
23464       assert(UseSSE >= 3, "required");
23465       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23466       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23467       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23468       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23469     } else {
23470       assert(UseAVX > 0, "required");
23471       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23472       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23473       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23474       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23475       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23476     }
23477   %}
23478   ins_pipe( pipe_slow );
23479 %}
23480 
23481 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23482   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23483   match(Set dst (VectorStoreMask src size));
23484   effect(TEMP_DEF dst, TEMP xtmp);
23485   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23486   ins_encode %{
23487     assert(UseSSE >= 3, "required");
23488     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23489     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23490     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23491     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23492     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23493   %}
23494   ins_pipe( pipe_slow );
23495 %}
23496 
23497 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23498   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23499   match(Set dst (VectorStoreMask src size));
23500   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23501   effect(TEMP_DEF dst, TEMP vtmp);
23502   ins_encode %{
23503     int vlen_enc = Assembler::AVX_128bit;
23504     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23505     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23506     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23507     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23508     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23509     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23510     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23511   %}
23512   ins_pipe( pipe_slow );
23513 %}
23514 
23515 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23516   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23517   match(Set dst (VectorStoreMask src size));
23518   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23519   ins_encode %{
23520     int src_vlen_enc = vector_length_encoding(this, $src);
23521     int dst_vlen_enc = vector_length_encoding(this);
23522     if (!VM_Version::supports_avx512vl()) {
23523       src_vlen_enc = Assembler::AVX_512bit;
23524     }
23525     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23526     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23527   %}
23528   ins_pipe( pipe_slow );
23529 %}
23530 
23531 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23532   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23533   match(Set dst (VectorStoreMask src size));
23534   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23535   ins_encode %{
23536     int src_vlen_enc = vector_length_encoding(this, $src);
23537     int dst_vlen_enc = vector_length_encoding(this);
23538     if (!VM_Version::supports_avx512vl()) {
23539       src_vlen_enc = Assembler::AVX_512bit;
23540     }
23541     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23542     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23543   %}
23544   ins_pipe( pipe_slow );
23545 %}
23546 
23547 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23548   predicate(n->in(1)->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
23549   match(Set dst (VectorStoreMask mask size));
23550   effect(TEMP_DEF dst);
23551   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23552   ins_encode %{
23553     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23554     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23555                  false, Assembler::AVX_512bit, noreg);
23556     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23557   %}
23558   ins_pipe( pipe_slow );
23559 %}
23560 
23561 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23562   predicate(n->in(1)->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
23563   match(Set dst (VectorStoreMask mask size));
23564   effect(TEMP_DEF dst);
23565   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23566   ins_encode %{
23567     int dst_vlen_enc = vector_length_encoding(this);
23568     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23569     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23570   %}
23571   ins_pipe( pipe_slow );
23572 %}
23573 
23574 instruct vmaskcast_evex(kReg dst) %{
23575   match(Set dst (VectorMaskCast dst));
23576   ins_cost(0);
23577   format %{ "vector_mask_cast $dst" %}
23578   ins_encode %{
23579     // empty
23580   %}
23581   ins_pipe(empty);
23582 %}
23583 
23584 instruct vmaskcast(vec dst) %{
23585   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23586   match(Set dst (VectorMaskCast dst));
23587   ins_cost(0);
23588   format %{ "vector_mask_cast $dst" %}
23589   ins_encode %{
23590     // empty
23591   %}
23592   ins_pipe(empty);
23593 %}
23594 
23595 instruct vmaskcast_avx(vec dst, vec src) %{
23596   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23597   match(Set dst (VectorMaskCast src));
23598   format %{ "vector_mask_cast $dst, $src" %}
23599   ins_encode %{
23600     int vlen = Matcher::vector_length(this);
23601     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23602     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23603     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23604   %}
23605   ins_pipe(pipe_slow);
23606 %}
23607 
23608 //-------------------------------- Load Iota Indices ----------------------------------
23609 
23610 instruct loadIotaIndices(vec dst, immI_0 src) %{
23611   match(Set dst (VectorLoadConst src));
23612   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23613   ins_encode %{
23614      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23615      BasicType bt = Matcher::vector_element_basic_type(this);
23616      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23617   %}
23618   ins_pipe( pipe_slow );
23619 %}
23620 
23621 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23622   match(Set dst (PopulateIndex src1 src2));
23623   effect(TEMP dst, TEMP vtmp);
23624   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23625   ins_encode %{
23626      assert($src2$$constant == 1, "required");
23627      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23628      int vlen_enc = vector_length_encoding(this);
23629      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23630      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23631      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23632      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23633   %}
23634   ins_pipe( pipe_slow );
23635 %}
23636 
23637 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23638   match(Set dst (PopulateIndex src1 src2));
23639   effect(TEMP dst, TEMP vtmp);
23640   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23641   ins_encode %{
23642      assert($src2$$constant == 1, "required");
23643      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23644      int vlen_enc = vector_length_encoding(this);
23645      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23646      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23647      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23648      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23649   %}
23650   ins_pipe( pipe_slow );
23651 %}
23652 
23653 //-------------------------------- Rearrange ----------------------------------
23654 
23655 // LoadShuffle/Rearrange for Byte
23656 instruct rearrangeB(vec dst, vec shuffle) %{
23657   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23658             Matcher::vector_length(n) < 32);
23659   match(Set dst (VectorRearrange dst shuffle));
23660   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23661   ins_encode %{
23662     assert(UseSSE >= 4, "required");
23663     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23664   %}
23665   ins_pipe( pipe_slow );
23666 %}
23667 
23668 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23669   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23670             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23671   match(Set dst (VectorRearrange src shuffle));
23672   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23673   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23674   ins_encode %{
23675     assert(UseAVX >= 2, "required");
23676     // Swap src into vtmp1
23677     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23678     // Shuffle swapped src to get entries from other 128 bit lane
23679     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23680     // Shuffle original src to get entries from self 128 bit lane
23681     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23682     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23683     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23684     // Perform the blend
23685     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23686   %}
23687   ins_pipe( pipe_slow );
23688 %}
23689 
23690 
23691 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23692   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23693             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23694   match(Set dst (VectorRearrange src shuffle));
23695   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23696   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23697   ins_encode %{
23698     int vlen_enc = vector_length_encoding(this);
23699     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23700                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23701                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23702   %}
23703   ins_pipe( pipe_slow );
23704 %}
23705 
23706 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23707   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23708             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23709   match(Set dst (VectorRearrange src shuffle));
23710   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23711   ins_encode %{
23712     int vlen_enc = vector_length_encoding(this);
23713     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23714   %}
23715   ins_pipe( pipe_slow );
23716 %}
23717 
23718 // LoadShuffle/Rearrange for Short
23719 
23720 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23721   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23722             !VM_Version::supports_avx512bw());
23723   match(Set dst (VectorLoadShuffle src));
23724   effect(TEMP dst, TEMP vtmp);
23725   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23726   ins_encode %{
23727     // Create a byte shuffle mask from short shuffle mask
23728     // only byte shuffle instruction available on these platforms
23729     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23730     if (UseAVX == 0) {
23731       assert(vlen_in_bytes <= 16, "required");
23732       // Multiply each shuffle by two to get byte index
23733       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23734       __ psllw($vtmp$$XMMRegister, 1);
23735 
23736       // Duplicate to create 2 copies of byte index
23737       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23738       __ psllw($dst$$XMMRegister, 8);
23739       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23740 
23741       // Add one to get alternate byte index
23742       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23743       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23744     } else {
23745       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23746       int vlen_enc = vector_length_encoding(this);
23747       // Multiply each shuffle by two to get byte index
23748       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23749 
23750       // Duplicate to create 2 copies of byte index
23751       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23752       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23753 
23754       // Add one to get alternate byte index
23755       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23756     }
23757   %}
23758   ins_pipe( pipe_slow );
23759 %}
23760 
23761 instruct rearrangeS(vec dst, vec shuffle) %{
23762   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23763             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23764   match(Set dst (VectorRearrange dst shuffle));
23765   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23766   ins_encode %{
23767     assert(UseSSE >= 4, "required");
23768     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23769   %}
23770   ins_pipe( pipe_slow );
23771 %}
23772 
23773 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23774   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23775             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23776   match(Set dst (VectorRearrange src shuffle));
23777   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23778   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23779   ins_encode %{
23780     assert(UseAVX >= 2, "required");
23781     // Swap src into vtmp1
23782     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23783     // Shuffle swapped src to get entries from other 128 bit lane
23784     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23785     // Shuffle original src to get entries from self 128 bit lane
23786     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23787     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23788     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23789     // Perform the blend
23790     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23791   %}
23792   ins_pipe( pipe_slow );
23793 %}
23794 
23795 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23796   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23797             VM_Version::supports_avx512bw());
23798   match(Set dst (VectorRearrange src shuffle));
23799   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23800   ins_encode %{
23801     int vlen_enc = vector_length_encoding(this);
23802     if (!VM_Version::supports_avx512vl()) {
23803       vlen_enc = Assembler::AVX_512bit;
23804     }
23805     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23806   %}
23807   ins_pipe( pipe_slow );
23808 %}
23809 
23810 // LoadShuffle/Rearrange for Integer and Float
23811 
23812 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23813   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23814             Matcher::vector_length(n) == 4 && UseAVX == 0);
23815   match(Set dst (VectorLoadShuffle src));
23816   effect(TEMP dst, TEMP vtmp);
23817   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23818   ins_encode %{
23819     assert(UseSSE >= 4, "required");
23820 
23821     // Create a byte shuffle mask from int shuffle mask
23822     // only byte shuffle instruction available on these platforms
23823 
23824     // Duplicate and multiply each shuffle by 4
23825     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23826     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23827     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23828     __ psllw($vtmp$$XMMRegister, 2);
23829 
23830     // Duplicate again to create 4 copies of byte index
23831     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23832     __ psllw($dst$$XMMRegister, 8);
23833     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23834 
23835     // Add 3,2,1,0 to get alternate byte index
23836     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23837     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23838   %}
23839   ins_pipe( pipe_slow );
23840 %}
23841 
23842 instruct rearrangeI(vec dst, vec shuffle) %{
23843   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23844             UseAVX == 0);
23845   match(Set dst (VectorRearrange dst shuffle));
23846   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23847   ins_encode %{
23848     assert(UseSSE >= 4, "required");
23849     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23850   %}
23851   ins_pipe( pipe_slow );
23852 %}
23853 
23854 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23855   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23856             UseAVX > 0);
23857   match(Set dst (VectorRearrange src shuffle));
23858   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23859   ins_encode %{
23860     int vlen_enc = vector_length_encoding(this);
23861     BasicType bt = Matcher::vector_element_basic_type(this);
23862     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23863   %}
23864   ins_pipe( pipe_slow );
23865 %}
23866 
23867 // LoadShuffle/Rearrange for Long and Double
23868 
23869 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23870   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23871             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23872   match(Set dst (VectorLoadShuffle src));
23873   effect(TEMP dst, TEMP vtmp);
23874   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23875   ins_encode %{
23876     assert(UseAVX >= 2, "required");
23877 
23878     int vlen_enc = vector_length_encoding(this);
23879     // Create a double word shuffle mask from long shuffle mask
23880     // only double word shuffle instruction available on these platforms
23881 
23882     // Multiply each shuffle by two to get double word index
23883     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23884 
23885     // Duplicate each double word shuffle
23886     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23887     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23888 
23889     // Add one to get alternate double word index
23890     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23891   %}
23892   ins_pipe( pipe_slow );
23893 %}
23894 
23895 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23896   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23897             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23898   match(Set dst (VectorRearrange src shuffle));
23899   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23900   ins_encode %{
23901     assert(UseAVX >= 2, "required");
23902 
23903     int vlen_enc = vector_length_encoding(this);
23904     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23905   %}
23906   ins_pipe( pipe_slow );
23907 %}
23908 
23909 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23910   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23911             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23912   match(Set dst (VectorRearrange src shuffle));
23913   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23914   ins_encode %{
23915     assert(UseAVX > 2, "required");
23916 
23917     int vlen_enc = vector_length_encoding(this);
23918     if (vlen_enc == Assembler::AVX_128bit) {
23919       vlen_enc = Assembler::AVX_256bit;
23920     }
23921     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23922   %}
23923   ins_pipe( pipe_slow );
23924 %}
23925 
23926 // --------------------------------- FMA --------------------------------------
23927 // a * b + c
23928 
23929 instruct vfmaF_reg(vec a, vec b, vec c) %{
23930   match(Set c (FmaVF  c (Binary a b)));
23931   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23932   ins_cost(150);
23933   ins_encode %{
23934     assert(UseFMA, "not enabled");
23935     int vlen_enc = vector_length_encoding(this);
23936     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23937   %}
23938   ins_pipe( pipe_slow );
23939 %}
23940 
23941 instruct vfmaF_mem(vec a, memory b, vec c) %{
23942   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23943   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23944   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23945   ins_cost(150);
23946   ins_encode %{
23947     assert(UseFMA, "not enabled");
23948     int vlen_enc = vector_length_encoding(this);
23949     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23950   %}
23951   ins_pipe( pipe_slow );
23952 %}
23953 
23954 instruct vfmaD_reg(vec a, vec b, vec c) %{
23955   match(Set c (FmaVD  c (Binary a b)));
23956   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23957   ins_cost(150);
23958   ins_encode %{
23959     assert(UseFMA, "not enabled");
23960     int vlen_enc = vector_length_encoding(this);
23961     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23962   %}
23963   ins_pipe( pipe_slow );
23964 %}
23965 
23966 instruct vfmaD_mem(vec a, memory b, vec c) %{
23967   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23968   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23969   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23970   ins_cost(150);
23971   ins_encode %{
23972     assert(UseFMA, "not enabled");
23973     int vlen_enc = vector_length_encoding(this);
23974     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23975   %}
23976   ins_pipe( pipe_slow );
23977 %}
23978 
23979 // --------------------------------- Vector Multiply Add --------------------------------------
23980 
23981 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23982   predicate(UseAVX == 0);
23983   match(Set dst (MulAddVS2VI dst src1));
23984   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23985   ins_encode %{
23986     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23987   %}
23988   ins_pipe( pipe_slow );
23989 %}
23990 
23991 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23992   predicate(UseAVX > 0);
23993   match(Set dst (MulAddVS2VI src1 src2));
23994   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23995   ins_encode %{
23996     int vlen_enc = vector_length_encoding(this);
23997     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23998   %}
23999   ins_pipe( pipe_slow );
24000 %}
24001 
24002 // --------------------------------- Vector Multiply Add Add ----------------------------------
24003 
24004 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
24005   predicate(VM_Version::supports_avx512_vnni());
24006   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
24007   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
24008   ins_encode %{
24009     assert(UseAVX > 2, "required");
24010     int vlen_enc = vector_length_encoding(this);
24011     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
24012   %}
24013   ins_pipe( pipe_slow );
24014   ins_cost(10);
24015 %}
24016 
24017 // --------------------------------- PopCount --------------------------------------
24018 
24019 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
24020   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24021   match(Set dst (PopCountVI src));
24022   match(Set dst (PopCountVL src));
24023   format %{ "vector_popcount_integral $dst, $src" %}
24024   ins_encode %{
24025     int opcode = this->ideal_Opcode();
24026     int vlen_enc = vector_length_encoding(this, $src);
24027     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24028     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
24029   %}
24030   ins_pipe( pipe_slow );
24031 %}
24032 
24033 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
24034   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24035   match(Set dst (PopCountVI src mask));
24036   match(Set dst (PopCountVL src mask));
24037   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
24038   ins_encode %{
24039     int vlen_enc = vector_length_encoding(this, $src);
24040     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24041     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24042     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
24043   %}
24044   ins_pipe( pipe_slow );
24045 %}
24046 
24047 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
24048   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24049   match(Set dst (PopCountVI src));
24050   match(Set dst (PopCountVL src));
24051   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24052   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
24053   ins_encode %{
24054     int opcode = this->ideal_Opcode();
24055     int vlen_enc = vector_length_encoding(this, $src);
24056     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24057     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24058                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
24059   %}
24060   ins_pipe( pipe_slow );
24061 %}
24062 
24063 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
24064 
24065 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
24066   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24067                                               Matcher::vector_length_in_bytes(n->in(1))));
24068   match(Set dst (CountTrailingZerosV src));
24069   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
24070   ins_cost(400);
24071   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
24072   ins_encode %{
24073     int vlen_enc = vector_length_encoding(this, $src);
24074     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24075     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24076                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24077   %}
24078   ins_pipe( pipe_slow );
24079 %}
24080 
24081 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24082   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24083             VM_Version::supports_avx512cd() &&
24084             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24085   match(Set dst (CountTrailingZerosV src));
24086   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24087   ins_cost(400);
24088   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
24089   ins_encode %{
24090     int vlen_enc = vector_length_encoding(this, $src);
24091     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24092     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24093                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24094   %}
24095   ins_pipe( pipe_slow );
24096 %}
24097 
24098 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
24099   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24100   match(Set dst (CountTrailingZerosV src));
24101   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
24102   ins_cost(400);
24103   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
24104   ins_encode %{
24105     int vlen_enc = vector_length_encoding(this, $src);
24106     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24107     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24108                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
24109                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
24110   %}
24111   ins_pipe( pipe_slow );
24112 %}
24113 
24114 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24115   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24116   match(Set dst (CountTrailingZerosV src));
24117   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24118   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24119   ins_encode %{
24120     int vlen_enc = vector_length_encoding(this, $src);
24121     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24122     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24123                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24124   %}
24125   ins_pipe( pipe_slow );
24126 %}
24127 
24128 
24129 // --------------------------------- Bitwise Ternary Logic ----------------------------------
24130 
24131 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
24132   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
24133   effect(TEMP dst);
24134   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24135   ins_encode %{
24136     int vector_len = vector_length_encoding(this);
24137     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
24138   %}
24139   ins_pipe( pipe_slow );
24140 %}
24141 
24142 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
24143   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
24144   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
24145   effect(TEMP dst);
24146   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24147   ins_encode %{
24148     int vector_len = vector_length_encoding(this);
24149     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
24150   %}
24151   ins_pipe( pipe_slow );
24152 %}
24153 
24154 // --------------------------------- Rotation Operations ----------------------------------
24155 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
24156   match(Set dst (RotateLeftV src shift));
24157   match(Set dst (RotateRightV src shift));
24158   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
24159   ins_encode %{
24160     int opcode      = this->ideal_Opcode();
24161     int vector_len  = vector_length_encoding(this);
24162     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24163     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
24164   %}
24165   ins_pipe( pipe_slow );
24166 %}
24167 
24168 instruct vprorate(vec dst, vec src, vec shift) %{
24169   match(Set dst (RotateLeftV src shift));
24170   match(Set dst (RotateRightV src shift));
24171   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
24172   ins_encode %{
24173     int opcode      = this->ideal_Opcode();
24174     int vector_len  = vector_length_encoding(this);
24175     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24176     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
24177   %}
24178   ins_pipe( pipe_slow );
24179 %}
24180 
24181 // ---------------------------------- Masked Operations ------------------------------------
24182 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
24183   predicate(!n->in(3)->bottom_type()->isa_pvectmask());
24184   match(Set dst (LoadVectorMasked mem mask));
24185   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24186   ins_encode %{
24187     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24188     int vlen_enc = vector_length_encoding(this);
24189     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
24190   %}
24191   ins_pipe( pipe_slow );
24192 %}
24193 
24194 
24195 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
24196   predicate(n->in(3)->bottom_type()->isa_pvectmask());
24197   match(Set dst (LoadVectorMasked mem mask));
24198   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24199   ins_encode %{
24200     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
24201     int vector_len = vector_length_encoding(this);
24202     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
24203   %}
24204   ins_pipe( pipe_slow );
24205 %}
24206 
24207 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
24208   predicate(!n->in(3)->in(2)->bottom_type()->isa_pvectmask());
24209   match(Set mem (StoreVectorMasked mem (Binary src mask)));
24210   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24211   ins_encode %{
24212     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24213     int vlen_enc = vector_length_encoding(src_node);
24214     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
24215     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
24216   %}
24217   ins_pipe( pipe_slow );
24218 %}
24219 
24220 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
24221   predicate(n->in(3)->in(2)->bottom_type()->isa_pvectmask());
24222   match(Set mem (StoreVectorMasked mem (Binary src mask)));
24223   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24224   ins_encode %{
24225     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24226     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
24227     int vlen_enc = vector_length_encoding(src_node);
24228     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
24229   %}
24230   ins_pipe( pipe_slow );
24231 %}
24232 
24233 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
24234   match(Set addr (VerifyVectorAlignment addr mask));
24235   effect(KILL cr);
24236   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
24237   ins_encode %{
24238     Label Lskip;
24239     // check if masked bits of addr are zero
24240     __ testq($addr$$Register, $mask$$constant);
24241     __ jccb(Assembler::equal, Lskip);
24242     __ stop("verify_vector_alignment found a misaligned vector memory access");
24243     __ bind(Lskip);
24244   %}
24245   ins_pipe(pipe_slow);
24246 %}
24247 
24248 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
24249   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
24250   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
24251   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
24252   ins_encode %{
24253     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
24254     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
24255 
24256     Label DONE;
24257     int vlen_enc = vector_length_encoding(this, $src1);
24258     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
24259 
24260     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
24261     __ mov64($dst$$Register, -1L);
24262     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
24263     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
24264     __ jccb(Assembler::carrySet, DONE);
24265     __ kmovql($dst$$Register, $ktmp1$$KRegister);
24266     __ notq($dst$$Register);
24267     __ tzcntq($dst$$Register, $dst$$Register);
24268     __ bind(DONE);
24269   %}
24270   ins_pipe( pipe_slow );
24271 %}
24272 
24273 
24274 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
24275   match(Set dst (VectorMaskGen len));
24276   effect(TEMP temp, KILL cr);
24277   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
24278   ins_encode %{
24279     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
24280   %}
24281   ins_pipe( pipe_slow );
24282 %}
24283 
24284 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
24285   match(Set dst (VectorMaskGen len));
24286   format %{ "vector_mask_gen $len \t! vector mask generator" %}
24287   effect(TEMP temp);
24288   ins_encode %{
24289     if ($len$$constant > 0) {
24290       __ mov64($temp$$Register, right_n_bits($len$$constant));
24291       __ kmovql($dst$$KRegister, $temp$$Register);
24292     } else {
24293       __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
24294     }
24295   %}
24296   ins_pipe( pipe_slow );
24297 %}
24298 
24299 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
24300   predicate(n->in(1)->bottom_type()->isa_pvectmask());
24301   match(Set dst (VectorMaskToLong mask));
24302   effect(TEMP dst, KILL cr);
24303   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
24304   ins_encode %{
24305     int opcode = this->ideal_Opcode();
24306     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24307     int mask_len = Matcher::vector_length(this, $mask);
24308     int mask_size = mask_len * type2aelembytes(mbt);
24309     int vlen_enc = vector_length_encoding(this, $mask);
24310     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24311                              $dst$$Register, mask_len, mask_size, vlen_enc);
24312   %}
24313   ins_pipe( pipe_slow );
24314 %}
24315 
24316 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24317   predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24318   match(Set dst (VectorMaskToLong mask));
24319   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24320   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24321   ins_encode %{
24322     int opcode = this->ideal_Opcode();
24323     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24324     int mask_len = Matcher::vector_length(this, $mask);
24325     int vlen_enc = vector_length_encoding(this, $mask);
24326     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24327                              $dst$$Register, mask_len, mbt, vlen_enc);
24328   %}
24329   ins_pipe( pipe_slow );
24330 %}
24331 
24332 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24333   predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24334   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24335   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24336   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24337   ins_encode %{
24338     int opcode = this->ideal_Opcode();
24339     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24340     int mask_len = Matcher::vector_length(this, $mask);
24341     int vlen_enc = vector_length_encoding(this, $mask);
24342     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24343                              $dst$$Register, mask_len, mbt, vlen_enc);
24344   %}
24345   ins_pipe( pipe_slow );
24346 %}
24347 
24348 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24349   predicate(n->in(1)->bottom_type()->isa_pvectmask());
24350   match(Set dst (VectorMaskTrueCount mask));
24351   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24352   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24353   ins_encode %{
24354     int opcode = this->ideal_Opcode();
24355     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24356     int mask_len = Matcher::vector_length(this, $mask);
24357     int mask_size = mask_len * type2aelembytes(mbt);
24358     int vlen_enc = vector_length_encoding(this, $mask);
24359     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24360                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24361   %}
24362   ins_pipe( pipe_slow );
24363 %}
24364 
24365 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24366   predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24367   match(Set dst (VectorMaskTrueCount mask));
24368   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24369   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24370   ins_encode %{
24371     int opcode = this->ideal_Opcode();
24372     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24373     int mask_len = Matcher::vector_length(this, $mask);
24374     int vlen_enc = vector_length_encoding(this, $mask);
24375     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24376                              $tmp$$Register, mask_len, mbt, vlen_enc);
24377   %}
24378   ins_pipe( pipe_slow );
24379 %}
24380 
24381 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24382   predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24383   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24384   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24385   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24386   ins_encode %{
24387     int opcode = this->ideal_Opcode();
24388     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24389     int mask_len = Matcher::vector_length(this, $mask);
24390     int vlen_enc = vector_length_encoding(this, $mask);
24391     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24392                              $tmp$$Register, mask_len, mbt, vlen_enc);
24393   %}
24394   ins_pipe( pipe_slow );
24395 %}
24396 
24397 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24398   predicate(n->in(1)->bottom_type()->isa_pvectmask());
24399   match(Set dst (VectorMaskFirstTrue mask));
24400   match(Set dst (VectorMaskLastTrue mask));
24401   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24402   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24403   ins_encode %{
24404     int opcode = this->ideal_Opcode();
24405     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24406     int mask_len = Matcher::vector_length(this, $mask);
24407     int mask_size = mask_len * type2aelembytes(mbt);
24408     int vlen_enc = vector_length_encoding(this, $mask);
24409     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24410                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24411   %}
24412   ins_pipe( pipe_slow );
24413 %}
24414 
24415 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24416   predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24417   match(Set dst (VectorMaskFirstTrue mask));
24418   match(Set dst (VectorMaskLastTrue mask));
24419   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24420   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24421   ins_encode %{
24422     int opcode = this->ideal_Opcode();
24423     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24424     int mask_len = Matcher::vector_length(this, $mask);
24425     int vlen_enc = vector_length_encoding(this, $mask);
24426     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24427                              $tmp$$Register, mask_len, mbt, vlen_enc);
24428   %}
24429   ins_pipe( pipe_slow );
24430 %}
24431 
24432 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24433   predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24434   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24435   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24436   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24437   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24438   ins_encode %{
24439     int opcode = this->ideal_Opcode();
24440     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24441     int mask_len = Matcher::vector_length(this, $mask);
24442     int vlen_enc = vector_length_encoding(this, $mask);
24443     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24444                              $tmp$$Register, mask_len, mbt, vlen_enc);
24445   %}
24446   ins_pipe( pipe_slow );
24447 %}
24448 
24449 // --------------------------------- Compress/Expand Operations ---------------------------
24450 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24451   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24452   match(Set dst (CompressV src mask));
24453   match(Set dst (ExpandV src mask));
24454   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24455   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24456   ins_encode %{
24457     int opcode = this->ideal_Opcode();
24458     int vlen_enc = vector_length_encoding(this);
24459     BasicType bt  = Matcher::vector_element_basic_type(this);
24460     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24461                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24462   %}
24463   ins_pipe( pipe_slow );
24464 %}
24465 
24466 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24467   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24468   match(Set dst (CompressV src mask));
24469   match(Set dst (ExpandV src mask));
24470   format %{ "vector_compress_expand $dst, $src, $mask" %}
24471   ins_encode %{
24472     int opcode = this->ideal_Opcode();
24473     int vector_len = vector_length_encoding(this);
24474     BasicType bt  = Matcher::vector_element_basic_type(this);
24475     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24476   %}
24477   ins_pipe( pipe_slow );
24478 %}
24479 
24480 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24481   match(Set dst (CompressM mask));
24482   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24483   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24484   ins_encode %{
24485     assert(this->in(1)->bottom_type()->isa_pvectmask(), "");
24486     int mask_len = Matcher::vector_length(this);
24487     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24488   %}
24489   ins_pipe( pipe_slow );
24490 %}
24491 
24492 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24493 
24494 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24495   predicate(!VM_Version::supports_gfni());
24496   match(Set dst (ReverseV src));
24497   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24498   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24499   ins_encode %{
24500     int vec_enc = vector_length_encoding(this);
24501     BasicType bt = Matcher::vector_element_basic_type(this);
24502     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24503                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24504   %}
24505   ins_pipe( pipe_slow );
24506 %}
24507 
24508 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24509   predicate(VM_Version::supports_gfni());
24510   match(Set dst (ReverseV src));
24511   effect(TEMP dst, TEMP xtmp);
24512   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24513   ins_encode %{
24514     int vec_enc = vector_length_encoding(this);
24515     BasicType bt  = Matcher::vector_element_basic_type(this);
24516     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24517     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24518                                $xtmp$$XMMRegister);
24519   %}
24520   ins_pipe( pipe_slow );
24521 %}
24522 
24523 instruct vreverse_byte_reg(vec dst, vec src) %{
24524   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24525   match(Set dst (ReverseBytesV src));
24526   effect(TEMP dst);
24527   format %{ "vector_reverse_byte $dst, $src" %}
24528   ins_encode %{
24529     int vec_enc = vector_length_encoding(this);
24530     BasicType bt = Matcher::vector_element_basic_type(this);
24531     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24532   %}
24533   ins_pipe( pipe_slow );
24534 %}
24535 
24536 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24537   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24538   match(Set dst (ReverseBytesV src));
24539   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24540   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24541   ins_encode %{
24542     int vec_enc = vector_length_encoding(this);
24543     BasicType bt = Matcher::vector_element_basic_type(this);
24544     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24545                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24546   %}
24547   ins_pipe( pipe_slow );
24548 %}
24549 
24550 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24551 
24552 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24553   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24554                                               Matcher::vector_length_in_bytes(n->in(1))));
24555   match(Set dst (CountLeadingZerosV src));
24556   format %{ "vector_count_leading_zeros $dst, $src" %}
24557   ins_encode %{
24558      int vlen_enc = vector_length_encoding(this, $src);
24559      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24560      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24561                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24562   %}
24563   ins_pipe( pipe_slow );
24564 %}
24565 
24566 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24567   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24568                                               Matcher::vector_length_in_bytes(n->in(1))));
24569   match(Set dst (CountLeadingZerosV src mask));
24570   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24571   ins_encode %{
24572     int vlen_enc = vector_length_encoding(this, $src);
24573     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24574     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24575     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24576                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24577   %}
24578   ins_pipe( pipe_slow );
24579 %}
24580 
24581 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24582   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24583             VM_Version::supports_avx512cd() &&
24584             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24585   match(Set dst (CountLeadingZerosV src));
24586   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24587   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24588   ins_encode %{
24589     int vlen_enc = vector_length_encoding(this, $src);
24590     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24591     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24592                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24593   %}
24594   ins_pipe( pipe_slow );
24595 %}
24596 
24597 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24598   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24599   match(Set dst (CountLeadingZerosV src));
24600   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24601   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24602   ins_encode %{
24603     int vlen_enc = vector_length_encoding(this, $src);
24604     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24605     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24606                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24607                                        $rtmp$$Register, true, vlen_enc);
24608   %}
24609   ins_pipe( pipe_slow );
24610 %}
24611 
24612 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24613   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24614             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24615   match(Set dst (CountLeadingZerosV src));
24616   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24617   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24618   ins_encode %{
24619     int vlen_enc = vector_length_encoding(this, $src);
24620     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24621     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24622                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24623   %}
24624   ins_pipe( pipe_slow );
24625 %}
24626 
24627 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24628   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24629             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24630   match(Set dst (CountLeadingZerosV src));
24631   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24632   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24633   ins_encode %{
24634     int vlen_enc = vector_length_encoding(this, $src);
24635     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24636     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24637                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24638   %}
24639   ins_pipe( pipe_slow );
24640 %}
24641 
24642 // ---------------------------------- Vector Masked Operations ------------------------------------
24643 
24644 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24645   match(Set dst (AddVB (Binary dst src2) mask));
24646   match(Set dst (AddVS (Binary dst src2) mask));
24647   match(Set dst (AddVI (Binary dst src2) mask));
24648   match(Set dst (AddVL (Binary dst src2) mask));
24649   match(Set dst (AddVF (Binary dst src2) mask));
24650   match(Set dst (AddVD (Binary dst src2) mask));
24651   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24652   ins_encode %{
24653     int vlen_enc = vector_length_encoding(this);
24654     BasicType bt = Matcher::vector_element_basic_type(this);
24655     int opc = this->ideal_Opcode();
24656     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24657                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24658   %}
24659   ins_pipe( pipe_slow );
24660 %}
24661 
24662 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24663   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24664   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24665   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24666   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24667   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24668   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24669   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24670   ins_encode %{
24671     int vlen_enc = vector_length_encoding(this);
24672     BasicType bt = Matcher::vector_element_basic_type(this);
24673     int opc = this->ideal_Opcode();
24674     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24675                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24676   %}
24677   ins_pipe( pipe_slow );
24678 %}
24679 
24680 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24681   match(Set dst (XorV (Binary dst src2) mask));
24682   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24683   ins_encode %{
24684     int vlen_enc = vector_length_encoding(this);
24685     BasicType bt = Matcher::vector_element_basic_type(this);
24686     int opc = this->ideal_Opcode();
24687     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24688                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24689   %}
24690   ins_pipe( pipe_slow );
24691 %}
24692 
24693 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24694   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24695   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24696   ins_encode %{
24697     int vlen_enc = vector_length_encoding(this);
24698     BasicType bt = Matcher::vector_element_basic_type(this);
24699     int opc = this->ideal_Opcode();
24700     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24701                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24702   %}
24703   ins_pipe( pipe_slow );
24704 %}
24705 
24706 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24707   match(Set dst (OrV (Binary dst src2) mask));
24708   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24709   ins_encode %{
24710     int vlen_enc = vector_length_encoding(this);
24711     BasicType bt = Matcher::vector_element_basic_type(this);
24712     int opc = this->ideal_Opcode();
24713     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24714                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24715   %}
24716   ins_pipe( pipe_slow );
24717 %}
24718 
24719 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24720   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24721   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24722   ins_encode %{
24723     int vlen_enc = vector_length_encoding(this);
24724     BasicType bt = Matcher::vector_element_basic_type(this);
24725     int opc = this->ideal_Opcode();
24726     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24727                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24728   %}
24729   ins_pipe( pipe_slow );
24730 %}
24731 
24732 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24733   match(Set dst (AndV (Binary dst src2) mask));
24734   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24735   ins_encode %{
24736     int vlen_enc = vector_length_encoding(this);
24737     BasicType bt = Matcher::vector_element_basic_type(this);
24738     int opc = this->ideal_Opcode();
24739     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24740                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24741   %}
24742   ins_pipe( pipe_slow );
24743 %}
24744 
24745 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24746   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24747   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24748   ins_encode %{
24749     int vlen_enc = vector_length_encoding(this);
24750     BasicType bt = Matcher::vector_element_basic_type(this);
24751     int opc = this->ideal_Opcode();
24752     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24753                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24754   %}
24755   ins_pipe( pipe_slow );
24756 %}
24757 
24758 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24759   match(Set dst (SubVB (Binary dst src2) mask));
24760   match(Set dst (SubVS (Binary dst src2) mask));
24761   match(Set dst (SubVI (Binary dst src2) mask));
24762   match(Set dst (SubVL (Binary dst src2) mask));
24763   match(Set dst (SubVF (Binary dst src2) mask));
24764   match(Set dst (SubVD (Binary dst src2) mask));
24765   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24766   ins_encode %{
24767     int vlen_enc = vector_length_encoding(this);
24768     BasicType bt = Matcher::vector_element_basic_type(this);
24769     int opc = this->ideal_Opcode();
24770     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24771                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24772   %}
24773   ins_pipe( pipe_slow );
24774 %}
24775 
24776 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24777   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24778   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24779   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24780   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24781   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24782   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24783   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24784   ins_encode %{
24785     int vlen_enc = vector_length_encoding(this);
24786     BasicType bt = Matcher::vector_element_basic_type(this);
24787     int opc = this->ideal_Opcode();
24788     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24789                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24790   %}
24791   ins_pipe( pipe_slow );
24792 %}
24793 
24794 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24795   match(Set dst (MulVS (Binary dst src2) mask));
24796   match(Set dst (MulVI (Binary dst src2) mask));
24797   match(Set dst (MulVL (Binary dst src2) mask));
24798   match(Set dst (MulVF (Binary dst src2) mask));
24799   match(Set dst (MulVD (Binary dst src2) mask));
24800   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24801   ins_encode %{
24802     int vlen_enc = vector_length_encoding(this);
24803     BasicType bt = Matcher::vector_element_basic_type(this);
24804     int opc = this->ideal_Opcode();
24805     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24806                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24807   %}
24808   ins_pipe( pipe_slow );
24809 %}
24810 
24811 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24812   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24813   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24814   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24815   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24816   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24817   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24818   ins_encode %{
24819     int vlen_enc = vector_length_encoding(this);
24820     BasicType bt = Matcher::vector_element_basic_type(this);
24821     int opc = this->ideal_Opcode();
24822     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24823                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24824   %}
24825   ins_pipe( pipe_slow );
24826 %}
24827 
24828 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24829   match(Set dst (SqrtVF dst mask));
24830   match(Set dst (SqrtVD dst mask));
24831   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24832   ins_encode %{
24833     int vlen_enc = vector_length_encoding(this);
24834     BasicType bt = Matcher::vector_element_basic_type(this);
24835     int opc = this->ideal_Opcode();
24836     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24837                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24838   %}
24839   ins_pipe( pipe_slow );
24840 %}
24841 
24842 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24843   match(Set dst (DivVF (Binary dst src2) mask));
24844   match(Set dst (DivVD (Binary dst src2) mask));
24845   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24846   ins_encode %{
24847     int vlen_enc = vector_length_encoding(this);
24848     BasicType bt = Matcher::vector_element_basic_type(this);
24849     int opc = this->ideal_Opcode();
24850     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24851                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24852   %}
24853   ins_pipe( pipe_slow );
24854 %}
24855 
24856 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24857   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24858   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24859   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24860   ins_encode %{
24861     int vlen_enc = vector_length_encoding(this);
24862     BasicType bt = Matcher::vector_element_basic_type(this);
24863     int opc = this->ideal_Opcode();
24864     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24865                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24866   %}
24867   ins_pipe( pipe_slow );
24868 %}
24869 
24870 
24871 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24872   match(Set dst (RotateLeftV (Binary dst shift) mask));
24873   match(Set dst (RotateRightV (Binary dst shift) mask));
24874   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24875   ins_encode %{
24876     int vlen_enc = vector_length_encoding(this);
24877     BasicType bt = Matcher::vector_element_basic_type(this);
24878     int opc = this->ideal_Opcode();
24879     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24880                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24881   %}
24882   ins_pipe( pipe_slow );
24883 %}
24884 
24885 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24886   match(Set dst (RotateLeftV (Binary dst src2) mask));
24887   match(Set dst (RotateRightV (Binary dst src2) mask));
24888   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24889   ins_encode %{
24890     int vlen_enc = vector_length_encoding(this);
24891     BasicType bt = Matcher::vector_element_basic_type(this);
24892     int opc = this->ideal_Opcode();
24893     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24894                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24895   %}
24896   ins_pipe( pipe_slow );
24897 %}
24898 
24899 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24900   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24901   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24902   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24903   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24904   ins_encode %{
24905     int vlen_enc = vector_length_encoding(this);
24906     BasicType bt = Matcher::vector_element_basic_type(this);
24907     int opc = this->ideal_Opcode();
24908     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24909                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24910   %}
24911   ins_pipe( pipe_slow );
24912 %}
24913 
24914 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24915   predicate(!n->as_ShiftV()->is_var_shift());
24916   match(Set dst (LShiftVS (Binary dst src2) mask));
24917   match(Set dst (LShiftVI (Binary dst src2) mask));
24918   match(Set dst (LShiftVL (Binary dst src2) mask));
24919   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24920   ins_encode %{
24921     int vlen_enc = vector_length_encoding(this);
24922     BasicType bt = Matcher::vector_element_basic_type(this);
24923     int opc = this->ideal_Opcode();
24924     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24925                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24926   %}
24927   ins_pipe( pipe_slow );
24928 %}
24929 
24930 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24931   predicate(n->as_ShiftV()->is_var_shift());
24932   match(Set dst (LShiftVS (Binary dst src2) mask));
24933   match(Set dst (LShiftVI (Binary dst src2) mask));
24934   match(Set dst (LShiftVL (Binary dst src2) mask));
24935   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24936   ins_encode %{
24937     int vlen_enc = vector_length_encoding(this);
24938     BasicType bt = Matcher::vector_element_basic_type(this);
24939     int opc = this->ideal_Opcode();
24940     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24941                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24942   %}
24943   ins_pipe( pipe_slow );
24944 %}
24945 
24946 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24947   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24948   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24949   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24950   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24951   ins_encode %{
24952     int vlen_enc = vector_length_encoding(this);
24953     BasicType bt = Matcher::vector_element_basic_type(this);
24954     int opc = this->ideal_Opcode();
24955     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24956                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24957   %}
24958   ins_pipe( pipe_slow );
24959 %}
24960 
24961 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24962   predicate(!n->as_ShiftV()->is_var_shift());
24963   match(Set dst (RShiftVS (Binary dst src2) mask));
24964   match(Set dst (RShiftVI (Binary dst src2) mask));
24965   match(Set dst (RShiftVL (Binary dst src2) mask));
24966   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24967   ins_encode %{
24968     int vlen_enc = vector_length_encoding(this);
24969     BasicType bt = Matcher::vector_element_basic_type(this);
24970     int opc = this->ideal_Opcode();
24971     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24972                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24973   %}
24974   ins_pipe( pipe_slow );
24975 %}
24976 
24977 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24978   predicate(n->as_ShiftV()->is_var_shift());
24979   match(Set dst (RShiftVS (Binary dst src2) mask));
24980   match(Set dst (RShiftVI (Binary dst src2) mask));
24981   match(Set dst (RShiftVL (Binary dst src2) mask));
24982   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24983   ins_encode %{
24984     int vlen_enc = vector_length_encoding(this);
24985     BasicType bt = Matcher::vector_element_basic_type(this);
24986     int opc = this->ideal_Opcode();
24987     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24988                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24989   %}
24990   ins_pipe( pipe_slow );
24991 %}
24992 
24993 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24994   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24995   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24996   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24997   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24998   ins_encode %{
24999     int vlen_enc = vector_length_encoding(this);
25000     BasicType bt = Matcher::vector_element_basic_type(this);
25001     int opc = this->ideal_Opcode();
25002     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25003                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
25004   %}
25005   ins_pipe( pipe_slow );
25006 %}
25007 
25008 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
25009   predicate(!n->as_ShiftV()->is_var_shift());
25010   match(Set dst (URShiftVS (Binary dst src2) mask));
25011   match(Set dst (URShiftVI (Binary dst src2) mask));
25012   match(Set dst (URShiftVL (Binary dst src2) mask));
25013   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
25014   ins_encode %{
25015     int vlen_enc = vector_length_encoding(this);
25016     BasicType bt = Matcher::vector_element_basic_type(this);
25017     int opc = this->ideal_Opcode();
25018     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25019                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
25020   %}
25021   ins_pipe( pipe_slow );
25022 %}
25023 
25024 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
25025   predicate(n->as_ShiftV()->is_var_shift());
25026   match(Set dst (URShiftVS (Binary dst src2) mask));
25027   match(Set dst (URShiftVI (Binary dst src2) mask));
25028   match(Set dst (URShiftVL (Binary dst src2) mask));
25029   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
25030   ins_encode %{
25031     int vlen_enc = vector_length_encoding(this);
25032     BasicType bt = Matcher::vector_element_basic_type(this);
25033     int opc = this->ideal_Opcode();
25034     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25035                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
25036   %}
25037   ins_pipe( pipe_slow );
25038 %}
25039 
25040 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
25041   match(Set dst (MaxV (Binary dst src2) mask));
25042   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
25043   ins_encode %{
25044     int vlen_enc = vector_length_encoding(this);
25045     BasicType bt = Matcher::vector_element_basic_type(this);
25046     int opc = this->ideal_Opcode();
25047     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25048                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25049   %}
25050   ins_pipe( pipe_slow );
25051 %}
25052 
25053 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
25054   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
25055   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
25056   ins_encode %{
25057     int vlen_enc = vector_length_encoding(this);
25058     BasicType bt = Matcher::vector_element_basic_type(this);
25059     int opc = this->ideal_Opcode();
25060     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25061                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
25062   %}
25063   ins_pipe( pipe_slow );
25064 %}
25065 
25066 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
25067   match(Set dst (MinV (Binary dst src2) mask));
25068   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25069   ins_encode %{
25070     int vlen_enc = vector_length_encoding(this);
25071     BasicType bt = Matcher::vector_element_basic_type(this);
25072     int opc = this->ideal_Opcode();
25073     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25074                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25075   %}
25076   ins_pipe( pipe_slow );
25077 %}
25078 
25079 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
25080   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
25081   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25082   ins_encode %{
25083     int vlen_enc = vector_length_encoding(this);
25084     BasicType bt = Matcher::vector_element_basic_type(this);
25085     int opc = this->ideal_Opcode();
25086     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25087                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
25088   %}
25089   ins_pipe( pipe_slow );
25090 %}
25091 
25092 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
25093   match(Set dst (VectorRearrange (Binary dst src2) mask));
25094   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
25095   ins_encode %{
25096     int vlen_enc = vector_length_encoding(this);
25097     BasicType bt = Matcher::vector_element_basic_type(this);
25098     int opc = this->ideal_Opcode();
25099     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25100                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25101   %}
25102   ins_pipe( pipe_slow );
25103 %}
25104 
25105 instruct vabs_masked(vec dst, kReg mask) %{
25106   match(Set dst (AbsVB dst mask));
25107   match(Set dst (AbsVS dst mask));
25108   match(Set dst (AbsVI dst mask));
25109   match(Set dst (AbsVL dst mask));
25110   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
25111   ins_encode %{
25112     int vlen_enc = vector_length_encoding(this);
25113     BasicType bt = Matcher::vector_element_basic_type(this);
25114     int opc = this->ideal_Opcode();
25115     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25116                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
25117   %}
25118   ins_pipe( pipe_slow );
25119 %}
25120 
25121 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
25122   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
25123   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
25124   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25125   ins_encode %{
25126     assert(UseFMA, "Needs FMA instructions support.");
25127     int vlen_enc = vector_length_encoding(this);
25128     BasicType bt = Matcher::vector_element_basic_type(this);
25129     int opc = this->ideal_Opcode();
25130     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25131                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
25132   %}
25133   ins_pipe( pipe_slow );
25134 %}
25135 
25136 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
25137   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
25138   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
25139   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25140   ins_encode %{
25141     assert(UseFMA, "Needs FMA instructions support.");
25142     int vlen_enc = vector_length_encoding(this);
25143     BasicType bt = Matcher::vector_element_basic_type(this);
25144     int opc = this->ideal_Opcode();
25145     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25146                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
25147   %}
25148   ins_pipe( pipe_slow );
25149 %}
25150 
25151 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
25152   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
25153   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
25154   ins_encode %{
25155     assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
25156     int vlen_enc = vector_length_encoding(this, $src1);
25157     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
25158 
25159     // Comparison i
25160     switch (src1_elem_bt) {
25161       case T_BYTE: {
25162         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25163         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25164         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25165         break;
25166       }
25167       case T_SHORT: {
25168         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25169         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25170         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25171         break;
25172       }
25173       case T_INT: {
25174         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25175         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25176         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25177         break;
25178       }
25179       case T_LONG: {
25180         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25181         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25182         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25183         break;
25184       }
25185       case T_FLOAT: {
25186         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25187         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25188         break;
25189       }
25190       case T_DOUBLE: {
25191         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25192         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25193         break;
25194       }
25195       default: assert(false, "%s", type2name(src1_elem_bt)); break;
25196     }
25197   %}
25198   ins_pipe( pipe_slow );
25199 %}
25200 
25201 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
25202   predicate(Matcher::vector_length(n) <= 32);
25203   match(Set dst (MaskAll src));
25204   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
25205   ins_encode %{
25206     int mask_len = Matcher::vector_length(this);
25207     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
25208   %}
25209   ins_pipe( pipe_slow );
25210 %}
25211 
25212 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
25213   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
25214   match(Set dst (XorVMask src (MaskAll cnt)));
25215   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
25216   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
25217   ins_encode %{
25218     uint masklen = Matcher::vector_length(this);
25219     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
25220   %}
25221   ins_pipe( pipe_slow );
25222 %}
25223 
25224 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
25225   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
25226             (Matcher::vector_length(n) == 16) ||
25227             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
25228   match(Set dst (XorVMask src (MaskAll cnt)));
25229   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
25230   ins_encode %{
25231     uint masklen = Matcher::vector_length(this);
25232     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
25233   %}
25234   ins_pipe( pipe_slow );
25235 %}
25236 
25237 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2) %{
25238   predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) <= 8);
25239   match(Set dst (VectorLongToMask src));
25240   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2);
25241   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2" %}
25242   ins_encode %{
25243     int mask_len = Matcher::vector_length(this);
25244     int vec_enc  = vector_length_encoding(mask_len);
25245     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25246                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
25247   %}
25248   ins_pipe( pipe_slow );
25249 %}
25250 
25251 
25252 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
25253   predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) > 8);
25254   match(Set dst (VectorLongToMask src));
25255   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
25256   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
25257   ins_encode %{
25258     int mask_len = Matcher::vector_length(this);
25259     assert(mask_len <= 32, "invalid mask length");
25260     int vec_enc  = vector_length_encoding(mask_len);
25261     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25262                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
25263   %}
25264   ins_pipe( pipe_slow );
25265 %}
25266 
25267 instruct long_to_mask_evex(kReg dst, rRegL src) %{
25268   predicate(n->bottom_type()->isa_pvectmask());
25269   match(Set dst (VectorLongToMask src));
25270   format %{ "long_to_mask_evex $dst, $src\t!" %}
25271   ins_encode %{
25272     __ kmov($dst$$KRegister, $src$$Register);
25273   %}
25274   ins_pipe( pipe_slow );
25275 %}
25276 
25277 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
25278   match(Set dst (AndVMask src1 src2));
25279   match(Set dst (OrVMask src1 src2));
25280   match(Set dst (XorVMask src1 src2));
25281   effect(TEMP kscratch);
25282   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
25283   ins_encode %{
25284     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
25285     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
25286     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
25287     uint masklen = Matcher::vector_length(this);
25288     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
25289     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
25290   %}
25291   ins_pipe( pipe_slow );
25292 %}
25293 
25294 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
25295   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25296   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25297   ins_encode %{
25298     int vlen_enc = vector_length_encoding(this);
25299     BasicType bt = Matcher::vector_element_basic_type(this);
25300     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25301                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
25302   %}
25303   ins_pipe( pipe_slow );
25304 %}
25305 
25306 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
25307   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25308   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25309   ins_encode %{
25310     int vlen_enc = vector_length_encoding(this);
25311     BasicType bt = Matcher::vector_element_basic_type(this);
25312     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25313                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25314   %}
25315   ins_pipe( pipe_slow );
25316 %}
25317 
25318 instruct castMM(kReg dst)
25319 %{
25320   match(Set dst (CastVV dst));
25321 
25322   size(0);
25323   format %{ "# castVV of $dst" %}
25324   ins_encode(/* empty encoding */);
25325   ins_cost(0);
25326   ins_pipe(empty);
25327 %}
25328 
25329 instruct castVV(vec dst)
25330 %{
25331   match(Set dst (CastVV dst));
25332 
25333   size(0);
25334   format %{ "# castVV of $dst" %}
25335   ins_encode(/* empty encoding */);
25336   ins_cost(0);
25337   ins_pipe(empty);
25338 %}
25339 
25340 instruct castVVLeg(legVec dst)
25341 %{
25342   match(Set dst (CastVV dst));
25343 
25344   size(0);
25345   format %{ "# castVV of $dst" %}
25346   ins_encode(/* empty encoding */);
25347   ins_cost(0);
25348   ins_pipe(empty);
25349 %}
25350 
25351 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25352 %{
25353   match(Set dst (IsInfiniteF src));
25354   effect(TEMP ktmp, KILL cr);
25355   format %{ "float_class_check $dst, $src" %}
25356   ins_encode %{
25357     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25358     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25359   %}
25360   ins_pipe(pipe_slow);
25361 %}
25362 
25363 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25364 %{
25365   match(Set dst (IsInfiniteD src));
25366   effect(TEMP ktmp, KILL cr);
25367   format %{ "double_class_check $dst, $src" %}
25368   ins_encode %{
25369     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25370     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25371   %}
25372   ins_pipe(pipe_slow);
25373 %}
25374 
25375 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25376 %{
25377   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25378             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25379   match(Set dst (SaturatingAddV src1 src2));
25380   match(Set dst (SaturatingSubV src1 src2));
25381   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25382   ins_encode %{
25383     int vlen_enc = vector_length_encoding(this);
25384     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25385     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25386                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25387   %}
25388   ins_pipe(pipe_slow);
25389 %}
25390 
25391 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25392 %{
25393   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25394             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25395   match(Set dst (SaturatingAddV src1 src2));
25396   match(Set dst (SaturatingSubV src1 src2));
25397   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25398   ins_encode %{
25399     int vlen_enc = vector_length_encoding(this);
25400     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25401     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25402                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25403   %}
25404   ins_pipe(pipe_slow);
25405 %}
25406 
25407 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25408 %{
25409   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25410             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25411             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25412   match(Set dst (SaturatingAddV src1 src2));
25413   match(Set dst (SaturatingSubV src1 src2));
25414   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25415   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25416   ins_encode %{
25417     int vlen_enc = vector_length_encoding(this);
25418     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25419     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25420                                         $src1$$XMMRegister, $src2$$XMMRegister,
25421                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25422                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25423   %}
25424   ins_pipe(pipe_slow);
25425 %}
25426 
25427 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25428 %{
25429   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25430             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25431             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25432   match(Set dst (SaturatingAddV src1 src2));
25433   match(Set dst (SaturatingSubV src1 src2));
25434   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25435   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25436   ins_encode %{
25437     int vlen_enc = vector_length_encoding(this);
25438     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25439     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25440                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25441                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25442   %}
25443   ins_pipe(pipe_slow);
25444 %}
25445 
25446 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25447 %{
25448   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25449             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25450             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25451   match(Set dst (SaturatingAddV src1 src2));
25452   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25453   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25454   ins_encode %{
25455     int vlen_enc = vector_length_encoding(this);
25456     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25457     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25458                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25459   %}
25460   ins_pipe(pipe_slow);
25461 %}
25462 
25463 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25464 %{
25465   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25466             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25467             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25468   match(Set dst (SaturatingAddV src1 src2));
25469   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25470   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25471   ins_encode %{
25472     int vlen_enc = vector_length_encoding(this);
25473     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25474     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25475                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25476   %}
25477   ins_pipe(pipe_slow);
25478 %}
25479 
25480 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25481 %{
25482   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25483             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25484             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25485   match(Set dst (SaturatingSubV src1 src2));
25486   effect(TEMP ktmp);
25487   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25488   ins_encode %{
25489     int vlen_enc = vector_length_encoding(this);
25490     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25491     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25492                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25493   %}
25494   ins_pipe(pipe_slow);
25495 %}
25496 
25497 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25498 %{
25499   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25500             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25501             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25502   match(Set dst (SaturatingSubV src1 src2));
25503   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25504   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25505   ins_encode %{
25506     int vlen_enc = vector_length_encoding(this);
25507     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25508     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25509                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25510   %}
25511   ins_pipe(pipe_slow);
25512 %}
25513 
25514 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25515 %{
25516   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25517             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25518   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25519   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25520   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25521   ins_encode %{
25522     int vlen_enc = vector_length_encoding(this);
25523     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25524     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25525                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25526   %}
25527   ins_pipe(pipe_slow);
25528 %}
25529 
25530 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25531 %{
25532   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25533             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25534   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25535   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25536   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25537   ins_encode %{
25538     int vlen_enc = vector_length_encoding(this);
25539     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25540     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25541                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25542   %}
25543   ins_pipe(pipe_slow);
25544 %}
25545 
25546 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25547   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25548             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25549   match(Set dst (SaturatingAddV (Binary dst src) mask));
25550   match(Set dst (SaturatingSubV (Binary dst src) mask));
25551   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25552   ins_encode %{
25553     int vlen_enc = vector_length_encoding(this);
25554     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25555     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25556                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25557   %}
25558   ins_pipe( pipe_slow );
25559 %}
25560 
25561 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25562   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25563             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25564   match(Set dst (SaturatingAddV (Binary dst src) mask));
25565   match(Set dst (SaturatingSubV (Binary dst src) mask));
25566   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25567   ins_encode %{
25568     int vlen_enc = vector_length_encoding(this);
25569     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25570     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25571                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25572   %}
25573   ins_pipe( pipe_slow );
25574 %}
25575 
25576 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25577   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25578             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25579   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25580   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25581   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25582   ins_encode %{
25583     int vlen_enc = vector_length_encoding(this);
25584     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25585     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25586                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25587   %}
25588   ins_pipe( pipe_slow );
25589 %}
25590 
25591 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25592   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25593             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25594   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25595   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25596   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25597   ins_encode %{
25598     int vlen_enc = vector_length_encoding(this);
25599     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25600     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25601                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25602   %}
25603   ins_pipe( pipe_slow );
25604 %}
25605 
25606 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25607 %{
25608   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25609   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25610   ins_encode %{
25611     int vlen_enc = vector_length_encoding(this);
25612     BasicType bt = Matcher::vector_element_basic_type(this);
25613     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25614   %}
25615   ins_pipe(pipe_slow);
25616 %}
25617 
25618 instruct reinterpretS2HF(regF dst, rRegI src)
25619 %{
25620   match(Set dst (ReinterpretS2HF src));
25621   format %{ "evmovw $dst, $src" %}
25622   ins_encode %{
25623     __ evmovw($dst$$XMMRegister, $src$$Register);
25624   %}
25625   ins_pipe(pipe_slow);
25626 %}
25627 
25628 instruct reinterpretHF2S(rRegI dst, regF src)
25629 %{
25630   match(Set dst (ReinterpretHF2S src));
25631   format %{ "evmovw $dst, $src" %}
25632   ins_encode %{
25633     __ evmovw($dst$$Register, $src$$XMMRegister);
25634     __ narrow_subword_type($dst$$Register, T_SHORT);
25635   %}
25636   ins_pipe(pipe_slow);
25637 %}
25638 
25639 instruct convF2HFAndS2HF(regF dst, regF src)
25640 %{
25641   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25642   format %{ "convF2HFAndS2HF $dst, $src" %}
25643   ins_encode %{
25644     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25645   %}
25646   ins_pipe(pipe_slow);
25647 %}
25648 
25649 instruct convHF2SAndHF2F(regF dst, regF src)
25650 %{
25651   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25652   format %{ "convHF2SAndHF2F $dst, $src" %}
25653   ins_encode %{
25654     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25655   %}
25656   ins_pipe(pipe_slow);
25657 %}
25658 
25659 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25660 %{
25661   match(Set dst (SqrtHF src));
25662   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25663   ins_encode %{
25664     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25665   %}
25666   ins_pipe(pipe_slow);
25667 %}
25668 
25669 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25670 %{
25671   match(Set dst (AddHF src1 src2));
25672   match(Set dst (DivHF src1 src2));
25673   match(Set dst (MulHF src1 src2));
25674   match(Set dst (SubHF src1 src2));
25675   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25676   ins_encode %{
25677     int opcode = this->ideal_Opcode();
25678     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25679   %}
25680   ins_pipe(pipe_slow);
25681 %}
25682 
25683 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25684 %{
25685   predicate(VM_Version::supports_avx10_2());
25686   match(Set dst (MaxHF src1 src2));
25687   match(Set dst (MinHF src1 src2));
25688 
25689   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25690   ins_encode %{
25691     int opcode = this->ideal_Opcode();
25692     __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
25693   %}
25694   ins_pipe( pipe_slow );
25695 %}
25696 
25697 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25698 %{
25699   predicate(!VM_Version::supports_avx10_2());
25700   match(Set dst (MaxHF src1 src2));
25701   match(Set dst (MinHF src1 src2));
25702   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25703 
25704   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25705   ins_encode %{
25706     int opcode = this->ideal_Opcode();
25707     __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25708                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25709   %}
25710   ins_pipe( pipe_slow );
25711 %}
25712 
25713 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25714 %{
25715   match(Set dst (FmaHF  src2 (Binary dst src1)));
25716   effect(DEF dst);
25717   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25718   ins_encode %{
25719     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25720   %}
25721   ins_pipe( pipe_slow );
25722 %}
25723 
25724 
25725 instruct vector_sqrt_HF_reg(vec dst, vec src)
25726 %{
25727   match(Set dst (SqrtVHF src));
25728   format %{ "vector_sqrt_fp16 $dst, $src" %}
25729   ins_encode %{
25730     int vlen_enc = vector_length_encoding(this);
25731     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25732   %}
25733   ins_pipe(pipe_slow);
25734 %}
25735 
25736 instruct vector_sqrt_HF_mem(vec dst, memory src)
25737 %{
25738   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25739   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25740   ins_encode %{
25741     int vlen_enc = vector_length_encoding(this);
25742     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25743   %}
25744   ins_pipe(pipe_slow);
25745 %}
25746 
25747 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25748 %{
25749   match(Set dst (AddVHF src1 src2));
25750   match(Set dst (DivVHF src1 src2));
25751   match(Set dst (MulVHF src1 src2));
25752   match(Set dst (SubVHF src1 src2));
25753   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25754   ins_encode %{
25755     int vlen_enc = vector_length_encoding(this);
25756     int opcode = this->ideal_Opcode();
25757     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25758   %}
25759   ins_pipe(pipe_slow);
25760 %}
25761 
25762 
25763 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25764 %{
25765   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25766   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25767   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25768   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25769   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25770   ins_encode %{
25771     int vlen_enc = vector_length_encoding(this);
25772     int opcode = this->ideal_Opcode();
25773     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25774   %}
25775   ins_pipe(pipe_slow);
25776 %}
25777 
25778 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25779 %{
25780   match(Set dst (FmaVHF src2 (Binary dst src1)));
25781   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25782   ins_encode %{
25783     int vlen_enc = vector_length_encoding(this);
25784     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25785   %}
25786   ins_pipe( pipe_slow );
25787 %}
25788 
25789 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25790 %{
25791   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25792   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25793   ins_encode %{
25794     int vlen_enc = vector_length_encoding(this);
25795     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25796   %}
25797   ins_pipe( pipe_slow );
25798 %}
25799 
25800 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25801 %{
25802   predicate(VM_Version::supports_avx10_2());
25803   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25804   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25805   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25806   ins_encode %{
25807     int vlen_enc = vector_length_encoding(this);
25808     int opcode = this->ideal_Opcode();
25809     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
25810                             k0, vlen_enc);
25811   %}
25812   ins_pipe( pipe_slow );
25813 %}
25814 
25815 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25816 %{
25817   predicate(VM_Version::supports_avx10_2());
25818   match(Set dst (MinVHF src1 src2));
25819   match(Set dst (MaxVHF src1 src2));
25820   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25821   ins_encode %{
25822     int vlen_enc = vector_length_encoding(this);
25823     int opcode = this->ideal_Opcode();
25824     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25825                             k0, vlen_enc);
25826   %}
25827   ins_pipe( pipe_slow );
25828 %}
25829 
25830 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25831 %{
25832   predicate(!VM_Version::supports_avx10_2());
25833   match(Set dst (MinVHF src1 src2));
25834   match(Set dst (MaxVHF src1 src2));
25835   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25836   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25837   ins_encode %{
25838     int vlen_enc = vector_length_encoding(this);
25839     int opcode = this->ideal_Opcode();
25840     __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25841                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25842   %}
25843   ins_pipe( pipe_slow );
25844 %}
25845 
25846 //----------PEEPHOLE RULES-----------------------------------------------------
25847 // These must follow all instruction definitions as they use the names
25848 // defined in the instructions definitions.
25849 //
25850 // peeppredicate ( rule_predicate );
25851 // // the predicate unless which the peephole rule will be ignored
25852 //
25853 // peepmatch ( root_instr_name [preceding_instruction]* );
25854 //
25855 // peepprocedure ( procedure_name );
25856 // // provide a procedure name to perform the optimization, the procedure should
25857 // // reside in the architecture dependent peephole file, the method has the
25858 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25859 // // with the arguments being the basic block, the current node index inside the
25860 // // block, the register allocator, the functions upon invoked return a new node
25861 // // defined in peepreplace, and the rules of the nodes appearing in the
25862 // // corresponding peepmatch, the function return true if successful, else
25863 // // return false
25864 //
25865 // peepconstraint %{
25866 // (instruction_number.operand_name relational_op instruction_number.operand_name
25867 //  [, ...] );
25868 // // instruction numbers are zero-based using left to right order in peepmatch
25869 //
25870 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25871 // // provide an instruction_number.operand_name for each operand that appears
25872 // // in the replacement instruction's match rule
25873 //
25874 // ---------VM FLAGS---------------------------------------------------------
25875 //
25876 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25877 //
25878 // Each peephole rule is given an identifying number starting with zero and
25879 // increasing by one in the order seen by the parser.  An individual peephole
25880 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25881 // on the command-line.
25882 //
25883 // ---------CURRENT LIMITATIONS----------------------------------------------
25884 //
25885 // Only transformations inside a basic block (do we need more for peephole)
25886 //
25887 // ---------EXAMPLE----------------------------------------------------------
25888 //
25889 // // pertinent parts of existing instructions in architecture description
25890 // instruct movI(rRegI dst, rRegI src)
25891 // %{
25892 //   match(Set dst (CopyI src));
25893 // %}
25894 //
25895 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25896 // %{
25897 //   match(Set dst (AddI dst src));
25898 //   effect(KILL cr);
25899 // %}
25900 //
25901 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25902 // %{
25903 //   match(Set dst (AddI dst src));
25904 // %}
25905 //
25906 // 1. Simple replacement
25907 // - Only match adjacent instructions in same basic block
25908 // - Only equality constraints
25909 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25910 // - Only one replacement instruction
25911 //
25912 // // Change (inc mov) to lea
25913 // peephole %{
25914 //   // lea should only be emitted when beneficial
25915 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25916 //   // increment preceded by register-register move
25917 //   peepmatch ( incI_rReg movI );
25918 //   // require that the destination register of the increment
25919 //   // match the destination register of the move
25920 //   peepconstraint ( 0.dst == 1.dst );
25921 //   // construct a replacement instruction that sets
25922 //   // the destination to ( move's source register + one )
25923 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25924 // %}
25925 //
25926 // 2. Procedural replacement
25927 // - More flexible finding relevent nodes
25928 // - More flexible constraints
25929 // - More flexible transformations
25930 // - May utilise architecture-dependent API more effectively
25931 // - Currently only one replacement instruction due to adlc parsing capabilities
25932 //
25933 // // Change (inc mov) to lea
25934 // peephole %{
25935 //   // lea should only be emitted when beneficial
25936 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25937 //   // the rule numbers of these nodes inside are passed into the function below
25938 //   peepmatch ( incI_rReg movI );
25939 //   // the method that takes the responsibility of transformation
25940 //   peepprocedure ( inc_mov_to_lea );
25941 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25942 //   // node is passed into the function above
25943 //   peepreplace ( leaI_rReg_immI() );
25944 // %}
25945 
25946 // These instructions is not matched by the matcher but used by the peephole
25947 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25948 %{
25949   predicate(false);
25950   match(Set dst (AddI src1 src2));
25951   format %{ "leal    $dst, [$src1 + $src2]" %}
25952   ins_encode %{
25953     Register dst = $dst$$Register;
25954     Register src1 = $src1$$Register;
25955     Register src2 = $src2$$Register;
25956     if (src1 != rbp && src1 != r13) {
25957       __ leal(dst, Address(src1, src2, Address::times_1));
25958     } else {
25959       assert(src2 != rbp && src2 != r13, "");
25960       __ leal(dst, Address(src2, src1, Address::times_1));
25961     }
25962   %}
25963   ins_pipe(ialu_reg_reg);
25964 %}
25965 
25966 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25967 %{
25968   predicate(false);
25969   match(Set dst (AddI src1 src2));
25970   format %{ "leal    $dst, [$src1 + $src2]" %}
25971   ins_encode %{
25972     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25973   %}
25974   ins_pipe(ialu_reg_reg);
25975 %}
25976 
25977 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25978 %{
25979   predicate(false);
25980   match(Set dst (LShiftI src shift));
25981   format %{ "leal    $dst, [$src << $shift]" %}
25982   ins_encode %{
25983     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25984     Register src = $src$$Register;
25985     if (scale == Address::times_2 && src != rbp && src != r13) {
25986       __ leal($dst$$Register, Address(src, src, Address::times_1));
25987     } else {
25988       __ leal($dst$$Register, Address(noreg, src, scale));
25989     }
25990   %}
25991   ins_pipe(ialu_reg_reg);
25992 %}
25993 
25994 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25995 %{
25996   predicate(false);
25997   match(Set dst (AddL src1 src2));
25998   format %{ "leaq    $dst, [$src1 + $src2]" %}
25999   ins_encode %{
26000     Register dst = $dst$$Register;
26001     Register src1 = $src1$$Register;
26002     Register src2 = $src2$$Register;
26003     if (src1 != rbp && src1 != r13) {
26004       __ leaq(dst, Address(src1, src2, Address::times_1));
26005     } else {
26006       assert(src2 != rbp && src2 != r13, "");
26007       __ leaq(dst, Address(src2, src1, Address::times_1));
26008     }
26009   %}
26010   ins_pipe(ialu_reg_reg);
26011 %}
26012 
26013 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
26014 %{
26015   predicate(false);
26016   match(Set dst (AddL src1 src2));
26017   format %{ "leaq    $dst, [$src1 + $src2]" %}
26018   ins_encode %{
26019     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
26020   %}
26021   ins_pipe(ialu_reg_reg);
26022 %}
26023 
26024 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
26025 %{
26026   predicate(false);
26027   match(Set dst (LShiftL src shift));
26028   format %{ "leaq    $dst, [$src << $shift]" %}
26029   ins_encode %{
26030     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
26031     Register src = $src$$Register;
26032     if (scale == Address::times_2 && src != rbp && src != r13) {
26033       __ leaq($dst$$Register, Address(src, src, Address::times_1));
26034     } else {
26035       __ leaq($dst$$Register, Address(noreg, src, scale));
26036     }
26037   %}
26038   ins_pipe(ialu_reg_reg);
26039 %}
26040 
26041 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
26042 // sal}) with lea instructions. The {add, sal} rules are beneficial in
26043 // processors with at least partial ALU support for lea
26044 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
26045 // beneficial for processors with full ALU support
26046 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
26047 
26048 peephole
26049 %{
26050   peeppredicate(VM_Version::supports_fast_2op_lea());
26051   peepmatch (addI_rReg);
26052   peepprocedure (lea_coalesce_reg);
26053   peepreplace (leaI_rReg_rReg_peep());
26054 %}
26055 
26056 peephole
26057 %{
26058   peeppredicate(VM_Version::supports_fast_2op_lea());
26059   peepmatch (addI_rReg_imm);
26060   peepprocedure (lea_coalesce_imm);
26061   peepreplace (leaI_rReg_immI_peep());
26062 %}
26063 
26064 peephole
26065 %{
26066   peeppredicate(VM_Version::supports_fast_3op_lea() ||
26067                 VM_Version::is_intel_cascade_lake());
26068   peepmatch (incI_rReg);
26069   peepprocedure (lea_coalesce_imm);
26070   peepreplace (leaI_rReg_immI_peep());
26071 %}
26072 
26073 peephole
26074 %{
26075   peeppredicate(VM_Version::supports_fast_3op_lea() ||
26076                 VM_Version::is_intel_cascade_lake());
26077   peepmatch (decI_rReg);
26078   peepprocedure (lea_coalesce_imm);
26079   peepreplace (leaI_rReg_immI_peep());
26080 %}
26081 
26082 peephole
26083 %{
26084   peeppredicate(VM_Version::supports_fast_2op_lea());
26085   peepmatch (salI_rReg_immI2);
26086   peepprocedure (lea_coalesce_imm);
26087   peepreplace (leaI_rReg_immI2_peep());
26088 %}
26089 
26090 peephole
26091 %{
26092   peeppredicate(VM_Version::supports_fast_2op_lea());
26093   peepmatch (addL_rReg);
26094   peepprocedure (lea_coalesce_reg);
26095   peepreplace (leaL_rReg_rReg_peep());
26096 %}
26097 
26098 peephole
26099 %{
26100   peeppredicate(VM_Version::supports_fast_2op_lea());
26101   peepmatch (addL_rReg_imm);
26102   peepprocedure (lea_coalesce_imm);
26103   peepreplace (leaL_rReg_immL32_peep());
26104 %}
26105 
26106 peephole
26107 %{
26108   peeppredicate(VM_Version::supports_fast_3op_lea() ||
26109                 VM_Version::is_intel_cascade_lake());
26110   peepmatch (incL_rReg);
26111   peepprocedure (lea_coalesce_imm);
26112   peepreplace (leaL_rReg_immL32_peep());
26113 %}
26114 
26115 peephole
26116 %{
26117   peeppredicate(VM_Version::supports_fast_3op_lea() ||
26118                 VM_Version::is_intel_cascade_lake());
26119   peepmatch (decL_rReg);
26120   peepprocedure (lea_coalesce_imm);
26121   peepreplace (leaL_rReg_immL32_peep());
26122 %}
26123 
26124 peephole
26125 %{
26126   peeppredicate(VM_Version::supports_fast_2op_lea());
26127   peepmatch (salL_rReg_immI2);
26128   peepprocedure (lea_coalesce_imm);
26129   peepreplace (leaL_rReg_immI2_peep());
26130 %}
26131 
26132 peephole
26133 %{
26134   peepmatch (leaPCompressedOopOffset);
26135   peepprocedure (lea_remove_redundant);
26136 %}
26137 
26138 peephole
26139 %{
26140   peepmatch (leaP8Narrow);
26141   peepprocedure (lea_remove_redundant);
26142 %}
26143 
26144 peephole
26145 %{
26146   peepmatch (leaP32Narrow);
26147   peepprocedure (lea_remove_redundant);
26148 %}
26149 
26150 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
26151 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
26152 
26153 //int variant
26154 peephole
26155 %{
26156   peepmatch (testI_reg);
26157   peepprocedure (test_may_remove);
26158 %}
26159 
26160 //long variant
26161 peephole
26162 %{
26163   peepmatch (testL_reg);
26164   peepprocedure (test_may_remove);
26165 %}
26166 
26167 
26168 //----------SMARTSPILL RULES---------------------------------------------------
26169 // These must follow all instruction definitions as they use the names
26170 // defined in the instructions definitions.